diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp index 5027e142c4284f..b1a92db53bda52 100644 --- a/clang/test/OpenMP/amdgcn_target_codegen.cpp +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc @@ -9,25 +10,16 @@ #define N 1000 int test_amdgcn_target_tid_threads() { -// CHECK-LABEL: define weak_odr protected amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads - int arr[N]; - -// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true) #pragma omp target for (int i = 0; i < N; i++) { arr[i] = 1; } - return arr[0]; } int test_amdgcn_target_tid_threads_simd() { -// CHECK-LABEL: define weak_odr protected amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads_simd - int arr[N]; - -// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false) #pragma omp target simd for (int i = 0; i < N; i++) { arr[i] = 1; @@ -36,3 +28,87 @@ int test_amdgcn_target_tid_threads_simd() { } #endif +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// CHECK: for.end: +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2, i1 false) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000 +// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// CHECK: omp.inner.for.end: +// CHECK-NEXT: store i32 1000, ptr [[I_ASCAST]], align 4 +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index bec7f3ec225991..b0f1937a35437d 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -1,43 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefixes=CHECK-64 %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefixes=CHECK-32 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefixes=CHECK-32-EX %s // expected-no-diagnostics #ifndef HEADER #define HEADER int a; -// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 - void foo() { -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for simd if(a) for (int i = 0; i < 10; ++i) ; @@ -60,27 +34,6 @@ void foo() { for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for lastprivate(a) for (int i = 0; i < 10; ++i) a = i; @@ -102,27 +55,6 @@ int a; #pragma omp target teams distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init( -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] #pragma omp target teams { int b; @@ -162,27 +94,6 @@ int a; #pragma omp distribute parallel for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] #pragma omp target teams #pragma omp distribute parallel for for (int i = 0; i < 10; ++i) @@ -211,26 +122,6 @@ int a; #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[DISTR_FULL]] -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] #pragma omp target #pragma omp teams #pragma omp distribute parallel for @@ -266,20 +157,6 @@ int a; #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] #pragma omp target parallel for if(a) for (int i = 0; i < 10; ++i) ; @@ -301,27 +178,6 @@ int a; #pragma omp target parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] #pragma omp target parallel if(a) #pragma omp for simd for (int i = 0; i < 10; ++i) @@ -350,26 +206,6 @@ int a; #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] #pragma omp target #pragma omp parallel #pragma omp for simd ordered @@ -405,18 +241,6 @@ int a; #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-DAG: [[FULL]] #pragma omp target #pragma omp parallel for for (int i = 0; i < 10; ++i) @@ -448,4 +272,26459 @@ int a; } #endif - +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15 +// CHECK-64-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-64: omp_if.then: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP11]] to i1 +// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 +// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK-64-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP12]] to i8* +// CHECK-64-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK-64-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL4]] to i32 +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP20]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP21]], i64 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP28]], 9 +// CHECK-64-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK-64: cond.true8: +// CHECK-64-NEXT: br label [[COND_END10:%.*]] +// CHECK-64: cond.false9: +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: br label [[COND_END10]] +// CHECK-64: cond.end10: +// CHECK-64-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK-64-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-64: omp_if.else: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK-64: omp.inner.for.cond12: +// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP31]], 10 +// CHECK-64-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] +// CHECK-64: omp.inner.for.body14: +// CHECK-64-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP33:%.*]] = zext i32 [[TMP32]] to i64 +// CHECK-64-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP35:%.*]] = zext i32 [[TMP34]] to i64 +// CHECK-64-NEXT: [[TMP36:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK-64-NEXT: [[CONV17:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED16]] to i8* +// CHECK-64-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 +// CHECK-64-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 +// CHECK-64-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED16]], align 8 +// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP39:%.*]] = inttoptr i64 [[TMP33]] to i8* +// CHECK-64-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 8 +// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP35]] to i8* +// CHECK-64-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 8 +// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP37]] to i8* +// CHECK-64-NEXT: store i8* [[TMP43]], i8** [[TMP42]], align 8 +// CHECK-64-NEXT: [[TMP44:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP44]] to i1 +// CHECK-64-NEXT: [[TMP45:%.*]] = zext i1 [[TOBOOL20]] to i32 +// CHECK-64-NEXT: [[TMP46:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP45]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP46]], i64 3) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] +// CHECK-64: omp.inner.for.inc21: +// CHECK-64-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK-64-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK-64-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK-64-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP53]], 9 +// CHECK-64-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] +// CHECK-64: cond.true26: +// CHECK-64-NEXT: br label [[COND_END28:%.*]] +// CHECK-64: cond.false27: +// CHECK-64-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END28]] +// CHECK-64: cond.end28: +// CHECK-64-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP54]], [[COND_FALSE27]] ] +// CHECK-64-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP55]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] +// CHECK-64: omp.inner.for.end30: +// CHECK-64-NEXT: br label [[OMP_IF_END]] +// CHECK-64: omp_if.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 +// CHECK-64-NEXT: br i1 [[TMP57]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-64: omp_if.then: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-64: omp_if.else: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5:%.*]] +// CHECK-64: omp.inner.for.cond5: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV6:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP15]] +// CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END14:%.*]] +// CHECK-64: omp.inner.for.body8: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK-64-NEXT: store i32 [[ADD10]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE11:%.*]] +// CHECK-64: omp.body.continue11: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC12:%.*]] +// CHECK-64: omp.inner.for.inc12: +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5]], !llvm.loop [[LOOP139:![0-9]+]] +// CHECK-64: omp.inner.for.end14: +// CHECK-64-NEXT: br label [[OMP_IF_END]] +// CHECK-64: omp_if.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-64: omp_if.then: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-64: omp_if.else: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5:%.*]] +// CHECK-64: omp.inner.for.cond5: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV6:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP15]] +// CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END14:%.*]] +// CHECK-64: omp.inner.for.body8: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK-64-NEXT: store i32 [[ADD10]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE11:%.*]] +// CHECK-64: omp.body.continue11: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC12:%.*]] +// CHECK-64: omp.inner.for.inc12: +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5]], !llvm.loop [[LOOP143:![0-9]+]] +// CHECK-64: omp.inner.for.end14: +// CHECK-64-NEXT: br label [[OMP_IF_END]] +// CHECK-64: omp_if.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__6 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__8 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__12 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__12 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__13 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__14 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__14 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37 +// CHECK-64-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__15 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) +// CHECK-64-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP10]], i32* [[CONV3]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK-64-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @__omp_outlined__16 to i8*), i8* null, i8** [[TMP18]], i64 3) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP25]], 9 +// CHECK-64-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK-64: cond.true7: +// CHECK-64-NEXT: br label [[COND_END9:%.*]] +// CHECK-64: cond.false8: +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END9]] +// CHECK-64: cond.end9: +// CHECK-64-NEXT: [[COND10:%.*]] = phi i32 [ 9, [[COND_TRUE7]] ], [ [[TMP26]], [[COND_FALSE8]] ] +// CHECK-64-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-64-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-64: .omp.lastprivate.then: +// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-64-NEXT: store i32 [[TMP30]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-64: .omp.lastprivate.done: +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i64 4) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__16 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[A3:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV4:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[TMP8]], i32* [[A3]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-64: .omp.lastprivate.then: +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-64: .omp.lastprivate.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__17 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__18 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__18 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__19 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__20 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__20 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__21 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__22 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__22 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__23 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__24 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__24 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__25 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__26 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__26 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__27 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__28 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__28 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__29 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__30 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__30 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__31 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i64 12, i1 false) +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK-64-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__32 to i8*), i8* null, i8** [[TMP15]], i64 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-64-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__32 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__33 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK-64-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** +// CHECK-64-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) +// CHECK-64-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK-64-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8** [[TMP15]], i64 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 8 +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[B]], i64 4) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[C]], i64 8) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__34 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__34_wrapper +// CHECK-64-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK-64-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 +// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i64* +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK-64-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK-64-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i64 [[TMP5]], i64 [[TMP8]]) #[[ATTR2]] +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__35 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__36 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__36 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__37 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__38 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__38 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__39 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__40 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__40 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__41 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__42 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__42 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__43 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__44 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__44 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__45 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__46 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__46 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__47 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__48 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__48 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__49 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__50 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__50 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__51 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__52 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__52 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__53 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__54 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__54 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__55 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__56 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__56 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__57 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__58 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__58 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__59 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__60 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__60 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__61 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__62 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__62 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__63 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__64 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__64 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__65 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__66 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__66 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__67 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__68 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__68 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155 +// CHECK-64-SAME: () #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__69 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__70 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-64: cond.true5: +// CHECK-64-NEXT: br label [[COND_END7:%.*]] +// CHECK-64: cond.false6: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END7]] +// CHECK-64: cond.end7: +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__70 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160 +// CHECK-64-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__71 to i8*), i8* null, i8** [[TMP4]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__71 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__72 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__72 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__73 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__73 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__74 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__74 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__75 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__75 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__76 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__76 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__77 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__77 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181 +// CHECK-64-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__78 to i8*), i8* null, i8** [[TMP4]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__78 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__79 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__79 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__80 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__80 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__81 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__81 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__82 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__82 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__83 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__83 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__84 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__84 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__85 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__85 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__86 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__86 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__87 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__87 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__88 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__88 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__89 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__89 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__90 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__90 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__91 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__91 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__92 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__92 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__93 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__93 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__94 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__94 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__95 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__95 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__96 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__96 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__97 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__97 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 +// CHECK-64-SAME: () #[[ATTR8]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__98 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__98 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15 +// CHECK-32-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32: omp_if.then: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK-32-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 +// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK-32-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK-32-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 +// CHECK-32-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL4]] to i32 +// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP19]], i32 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP26]], 9 +// CHECK-32-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK-32: cond.true8: +// CHECK-32-NEXT: br label [[COND_END10:%.*]] +// CHECK-32: cond.false9: +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: br label [[COND_END10]] +// CHECK-32: cond.end10: +// CHECK-32-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP27]], [[COND_FALSE9]] ] +// CHECK-32-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-32: omp_if.else: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK-32: omp.inner.for.cond12: +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP29]], 10 +// CHECK-32-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] +// CHECK-32: omp.inner.for.body14: +// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK-32-NEXT: [[CONV17:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED16]] to i8* +// CHECK-32-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 +// CHECK-32-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 +// CHECK-32-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED16]], align 4 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP30]] to i8* +// CHECK-32-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP31]] to i8* +// CHECK-32-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 +// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP33]] to i8* +// CHECK-32-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 +// CHECK-32-NEXT: [[TMP40:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP40]] to i1 +// CHECK-32-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL20]] to i32 +// CHECK-32-NEXT: [[TMP42:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP42]], i32 3) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] +// CHECK-32: omp.inner.for.inc21: +// CHECK-32-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK-32-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK-32-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK-32-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP49]], 9 +// CHECK-32-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] +// CHECK-32: cond.true26: +// CHECK-32-NEXT: br label [[COND_END28:%.*]] +// CHECK-32: cond.false27: +// CHECK-32-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END28]] +// CHECK-32: cond.end28: +// CHECK-32-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP50]], [[COND_FALSE27]] ] +// CHECK-32-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP51]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] +// CHECK-32: omp.inner.for.end30: +// CHECK-32-NEXT: br label [[OMP_IF_END]] +// CHECK-32: omp_if.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK-32-NEXT: br i1 [[TMP53]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32: omp_if.then: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-32: omp_if.else: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] +// CHECK-32: omp.inner.for.cond2: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] +// CHECK-32: omp.inner.for.body4: +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] +// CHECK-32: omp.body.continue7: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] +// CHECK-32: omp.inner.for.inc8: +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP139:![0-9]+]] +// CHECK-32: omp.inner.for.end10: +// CHECK-32-NEXT: br label [[OMP_IF_END]] +// CHECK-32: omp_if.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32: omp_if.then: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-32: omp_if.else: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] +// CHECK-32: omp.inner.for.cond2: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] +// CHECK-32: omp.inner.for.body4: +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] +// CHECK-32: omp.body.continue7: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] +// CHECK-32: omp.inner.for.inc8: +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP143:![0-9]+]] +// CHECK-32: omp.inner.for.end10: +// CHECK-32-NEXT: br label [[OMP_IF_END]] +// CHECK-32: omp_if.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__6 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__8 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__12 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__12 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__13 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__14 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__14 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37 +// CHECK-32-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__15 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to i8* +// CHECK-32-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__16 to i8*), i8* null, i8** [[TMP16]], i32 3) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP23]], 9 +// CHECK-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK-32: cond.true6: +// CHECK-32-NEXT: br label [[COND_END8:%.*]] +// CHECK-32: cond.false7: +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END8]] +// CHECK-32: cond.end8: +// CHECK-32-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP24]], [[COND_FALSE7]] ] +// CHECK-32-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32: .omp.lastprivate.then: +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-32-NEXT: store i32 [[TMP28]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-32: .omp.lastprivate.done: +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__16 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[A1]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32: .omp.lastprivate.then: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-32: .omp.lastprivate.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__17 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__18 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__18 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__19 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__20 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__20 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__21 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__22 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__22 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__23 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__24 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__24 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__25 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__26 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__26 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__27 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__28 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__28 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__29 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__30 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__30 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__31 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i32 12, i1 false) +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__32 to i8*), i8* null, i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK-32-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__32 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__33 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** +// CHECK-32-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 4 +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[B]], i32 4) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 4) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__34 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__34_wrapper +// CHECK-32-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK-32-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32 [[TMP5]], i32 [[TMP8]]) #[[ATTR2]] +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__35 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__36 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__36 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__37 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__38 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__38 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__39 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__40 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__40 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__41 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__42 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__42 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__43 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__44 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__44 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__45 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__46 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__46 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__47 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__48 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__48 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__49 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__50 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__50 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__51 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__52 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__52 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__53 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__54 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__54 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__55 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__56 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__56 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__57 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__58 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__58 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__59 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__60 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__60 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__61 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__62 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__62 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__63 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__64 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__64 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__65 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__66 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__66 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__67 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__68 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__68 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155 +// CHECK-32-SAME: () #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__69 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__70 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32: cond.true5: +// CHECK-32-NEXT: br label [[COND_END7:%.*]] +// CHECK-32: cond.false6: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END7]] +// CHECK-32: cond.end7: +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__70 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160 +// CHECK-32-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__71 to i8*), i8* null, i8** [[TMP4]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__71 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__72 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__72 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__73 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__73 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__74 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__74 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__75 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__75 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__76 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__76 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__77 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__77 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181 +// CHECK-32-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__78 to i8*), i8* null, i8** [[TMP4]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__78 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__79 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__79 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__80 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__80 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__81 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__81 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__82 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__82 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__83 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__83 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__84 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__84 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__85 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__85 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__86 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__86 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__87 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__87 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__88 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__88 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__89 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__89 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__90 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__90 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__91 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__91 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__92 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__92 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__93 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__93 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__94 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__94 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__95 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__95 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__96 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__96 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__97 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__97 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 +// CHECK-32-SAME: () #[[ATTR8]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__98 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__98 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15 +// CHECK-32-EX-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-EX-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-EX: omp_if.then: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK-32-EX-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL4]] to i32 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP19]], i32 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP26]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK-32-EX: cond.true8: +// CHECK-32-EX-NEXT: br label [[COND_END10:%.*]] +// CHECK-32-EX: cond.false9: +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: br label [[COND_END10]] +// CHECK-32-EX: cond.end10: +// CHECK-32-EX-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP27]], [[COND_FALSE9]] ] +// CHECK-32-EX-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-32-EX: omp_if.else: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK-32-EX: omp.inner.for.cond12: +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP29]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] +// CHECK-32-EX: omp.inner.for.body14: +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK-32-EX-NEXT: [[CONV17:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED16]] to i8* +// CHECK-32-EX-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED16]], align 4 +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP30]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 +// CHECK-32-EX-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP31]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 +// CHECK-32-EX-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP33]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 +// CHECK-32-EX-NEXT: [[TMP40:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP40]] to i1 +// CHECK-32-EX-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL20]] to i32 +// CHECK-32-EX-NEXT: [[TMP42:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP42]], i32 3) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] +// CHECK-32-EX: omp.inner.for.inc21: +// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK-32-EX-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK-32-EX-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK-32-EX-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP49]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] +// CHECK-32-EX: cond.true26: +// CHECK-32-EX-NEXT: br label [[COND_END28:%.*]] +// CHECK-32-EX: cond.false27: +// CHECK-32-EX-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END28]] +// CHECK-32-EX: cond.end28: +// CHECK-32-EX-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP50]], [[COND_FALSE27]] ] +// CHECK-32-EX-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP51]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end30: +// CHECK-32-EX-NEXT: br label [[OMP_IF_END]] +// CHECK-32-EX: omp_if.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP53]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-EX: omp_if.then: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-32-EX: omp_if.else: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] +// CHECK-32-EX: omp.inner.for.cond2: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] +// CHECK-32-EX: omp.inner.for.body4: +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] +// CHECK-32-EX: omp.body.continue7: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] +// CHECK-32-EX: omp.inner.for.inc8: +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP139:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end10: +// CHECK-32-EX-NEXT: br label [[OMP_IF_END]] +// CHECK-32-EX: omp_if.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-EX: omp_if.then: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-32-EX: omp_if.else: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] +// CHECK-32-EX: omp.inner.for.cond2: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] +// CHECK-32-EX: omp.inner.for.body4: +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] +// CHECK-32-EX: omp.body.continue7: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] +// CHECK-32-EX: omp.inner.for.inc8: +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP143:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end10: +// CHECK-32-EX-NEXT: br label [[OMP_IF_END]] +// CHECK-32-EX: omp_if.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__6 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__8 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__12 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__12 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__13 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__14 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__14 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37 +// CHECK-32-EX-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__15 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__16 to i8*), i8* null, i8** [[TMP16]], i32 3) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP23]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK-32-EX: cond.true6: +// CHECK-32-EX-NEXT: br label [[COND_END8:%.*]] +// CHECK-32-EX: cond.false7: +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END8]] +// CHECK-32-EX: cond.end8: +// CHECK-32-EX-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP24]], [[COND_FALSE7]] ] +// CHECK-32-EX-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-EX: .omp.lastprivate.then: +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP28]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-32-EX: .omp.lastprivate.done: +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__16 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-EX: .omp.lastprivate.then: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-32-EX: .omp.lastprivate.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__17 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__18 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__18 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__19 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__20 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__20 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__21 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__22 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__22 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__23 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__24 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__24 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__25 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__26 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__26 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__27 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__28 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__28 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__29 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__30 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__30 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__31 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* +// CHECK-32-EX-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i32 12, i1 false) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__32 to i8*), i8* null, i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__32 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__33 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** +// CHECK-32-EX-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[B]], i32 4) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 4) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__34 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__34_wrapper +// CHECK-32-EX-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32 [[TMP5]], i32 [[TMP8]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__35 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__36 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__36 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__37 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__38 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__38 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__39 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__40 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__40 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__41 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__42 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__42 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__43 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__44 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__44 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__45 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__46 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__46 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__47 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__48 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__48 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__49 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__50 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__50 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__51 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__52 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__52 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__53 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__54 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__54 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__55 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__56 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__56 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__57 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__58 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__58 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__59 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__60 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__60 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__61 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__62 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__62 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__63 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__64 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__64 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__65 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__66 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__66 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__67 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__68 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__68 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155 +// CHECK-32-EX-SAME: () #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__69 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__70 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK-32-EX: cond.true5: +// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] +// CHECK-32-EX: cond.false6: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END7]] +// CHECK-32-EX: cond.end7: +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__70 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160 +// CHECK-32-EX-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__71 to i8*), i8* null, i8** [[TMP4]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__71 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__72 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__72 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__73 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__73 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__74 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__74 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__75 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__75 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__76 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__76 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__77 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__77 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181 +// CHECK-32-EX-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__78 to i8*), i8* null, i8** [[TMP4]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__78 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__79 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__79 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__80 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__80 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__81 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__81 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__82 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__82 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__83 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__83 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__84 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__84 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__85 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__85 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__86 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__86 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__87 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__87 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__88 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__88 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__89 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__89 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__90 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__90 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__91 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__91 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__92 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__92 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__93 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__93 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32-EX: omp.loop.exit: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__94 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__94 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__95 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__95 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__96 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__96 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__97 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__97 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 +// CHECK-32-EX-SAME: () #[[ATTR8]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__98 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__98 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: ret void +// diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp index 0f14d687bf79bd..b2cf555cafacfe 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -1,24 +1,22 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK45-64 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32-EX // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32-EX // expected-no-diagnostics #ifndef HEADER #define HEADER // Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak protected constant i8 2 template tx ftemplate(int n) { @@ -53,5 +51,955 @@ int bar(int n){ return a; } -// CHECK-NOT: call void @__kmpc_push_proc_bind #endif +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 +// CHECK45-64-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK45-64-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK45-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK45-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK45-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-64-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK45-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// CHECK45-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK45-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK45-64-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK45-64-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK45-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK45-64-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr +// CHECK45-64-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 +// CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 +// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK45-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-64-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK45-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 +// CHECK45-32-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK45-32-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +// CHECK45-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-32-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK45-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// CHECK45-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr +// CHECK45-32-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK45-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK45-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK45-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 +// CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK45-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-32-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK45-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 +// CHECK45-32-EX-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK45-32-EX-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +// CHECK45-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK45-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// CHECK45-32-EX-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr +// CHECK45-32-EX-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK45-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK45-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK45-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 +// CHECK-64-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK-64-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// CHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-64-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr +// CHECK-64-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr +// CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 +// CHECK-32-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK-32-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +// CHECK-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-32-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr +// CHECK-32-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-32-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 +// CHECK-32-EX-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 +// CHECK-32-EX-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 +// CHECK-32-EX-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr +// CHECK-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK-32-EX-NEXT: ret void +// diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index b9e2c48001d60b..56966786b9a736 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -1,20 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32-EX // expected-no-diagnostics #ifndef HEADER #define HEADER // Check for the data transfer medium in shared memory to transfer the reduction list to the first warp. -// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = weak addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32] // Check that the execution mode of all 3 target regions is set to Spmd Mode. -// CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak protected constant i8 2 template tx ftemplate(int n) { @@ -52,741 +49,2246 @@ int bar(int n){ return a; } -// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( -// -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) -// -// // define internal void [[PFN]]( -// CHECK: store double {{[0\.e\+]+}}, double* [[E:%.+]], align -// CHECK: [[EV:%.+]] = load double, double* [[E]], align -// CHECK: [[ADD:%.+]] = fadd double [[EV]], 5 -// CHECK: store double [[ADD]], double* [[E]], align -// CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [1 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[E_CAST:%.+]] = bitcast double* [[E]] to i8* -// CHECK: store i8* [[E_CAST]], i8** [[PTR1]], align -// CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* -// CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 1, i{{32|64}} {{4|8}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) -// CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 -// CHECK: br i1 [[CMP]], label -// CHECK: [[E_INV:%.+]] = load double, double* [[E_IN:%.+]], align -// CHECK: [[EV:%.+]] = load double, double* [[E]], align -// CHECK: [[ADD:%.+]] = fadd double [[E_INV]], [[EV]] -// CHECK: store double [[ADD]], double* [[E_IN]], align -// CHECK: call void @__kmpc_nvptx_end_reduce_nowait( -// CHECK: br label -// -// CHECK: ret -// // Reduction function -// CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* noundef %0, i8* noundef %1) -// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]], -// CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to double* -// -// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]], -// CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to double* -// -// CHECK: [[VAR_LHS_VAL:%.+]] = load double, double* [[VAR_LHS]], -// CHECK: [[VAR_RHS_VAL:%.+]] = load double, double* [[VAR_RHS]], -// CHECK: [[RES:%.+]] = fadd double [[VAR_LHS_VAL]], [[VAR_RHS_VAL]] -// CHECK: store double [[RES]], double* [[VAR_LHS]], -// CHECK: ret void -// // Shuffle and reduce function -// CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* noundef %0, i16 noundef {{.*}}, i16 noundef {{.*}}, i16 noundef {{.*}}) -// CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align -// CHECK: [[REMOTE_ELT:%.+]] = alloca double -// -// CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align -// CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align -// CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align -// -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to double** -// CHECK: [[ELT:%.+]] = load double*, double** [[ELT_REF_CAST]], -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// -// CHECK: [[ELT_CAST:%.+]] = bitcast double* [[ELT]] to i64* -// CHECK: [[REMOTE_ELT_CAST:%.+]] = bitcast double* [[REMOTE_ELT]] to i64* -// CHECK: [[ELT_VAL:%.+]] = load i64, i64* [[ELT_CAST]], align -// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size() -// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 -// CHECK: [[REMOTE_ELT_VAL64:%.+]] = call i64 @__kmpc_shuffle_int64(i64 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) -// -// CHECK: store i64 [[REMOTE_ELT_VAL64]], i64* [[REMOTE_ELT_CAST]], align -// CHECK: [[REMOTE_ELT_VOID:%.+]] = bitcast double* [[REMOTE_ELT]] to i8* -// CHECK: store i8* [[REMOTE_ELT_VOID]], i8** [[REMOTE_ELT_REF]], align -// // Condition to reduce -// CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 -// -// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 -// CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] -// CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] -// -// CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 -// CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 -// CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 -// CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] -// CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 -// CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] -// -// CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] -// CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] -// CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] -// -// CHECK: [[DO_REDUCE]] -// CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* -// CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* -// CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) -// CHECK: br label {{%?}}[[REDUCE_CONT:.+]] -// -// CHECK: [[REDUCE_ELSE]] -// CHECK: br label {{%?}}[[REDUCE_CONT]] -// -// CHECK: [[REDUCE_CONT]] // Now check if we should just copy over the remote reduction list -// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 -// CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] -// CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] -// CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] -// -// CHECK: [[DO_COPY]] -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[REMOTE_ELT_REF_CAST:%.+]] = bitcast i8** [[REMOTE_ELT_REF]] to double** -// CHECK: [[REMOTE_ELT:%.+]] = load double*, double** [[REMOTE_ELT_REF_CAST]], -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to double** -// CHECK: [[ELT:%.+]] = load double*, double** [[ELT_REF_CAST]], -// CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align -// CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align -// CHECK: br label {{%?}}[[COPY_CONT:.+]] -// -// CHECK: [[COPY_ELSE]] -// CHECK: br label {{%?}}[[COPY_CONT]] -// -// CHECK: [[COPY_CONT]] -// CHECK: void -// // Inter warp copy function -// CHECK: define internal void [[WARP_COPY_FN]](i8* noundef %0, i32 noundef %1) -// CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 -// CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 -// CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* -// CHECK: store i32 0, i32* [[CNT_ADDR:%.+]], -// CHECK: br label -// CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]], -// CHECK: [[DONE_COPY:%.+]] = icmp ult i32 [[CNT]], 2 -// CHECK: br i1 [[DONE_COPY]], label -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 -// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] -// // [[DO_COPY]] -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[BASE_ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* -// CHECK: [[ELT:%.+]] = getelementptr i32, i32* [[BASE_ELT]], i32 [[CNT]] -// -// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] -// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], -// CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], -// CHECK: br label {{%?}}[[COPY_CONT:.+]] -// -// CHECK: [[COPY_ELSE]] -// CHECK: br label {{%?}}[[COPY_CONT]] -// // Barrier after copy to shared memory storage medium. -// CHECK: [[COPY_CONT]] -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* -// // Read into warp 0. -// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] -// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] -// -// CHECK: [[DO_READ]] -// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[ELT_BASE:%.+]] = bitcast i8* [[ELT_VOID]] to i32* -// CHECK: [[ELT:%.+]] = getelementptr i32, i32* [[ELT_BASE]], i32 [[CNT]] -// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], -// CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], -// CHECK: br label {{%?}}[[READ_CONT:.+]] -// -// CHECK: [[READ_ELSE]] -// CHECK: br label {{%?}}[[READ_CONT]] -// -// CHECK: [[READ_CONT]] -// CHECK: [[NEXT:%.+]] = add nsw i32 [[CNT]], 1 -// CHECK: store i32 [[NEXT]], i32* [[CNT_ADDR]], -// CHECK: br label -// CHECK: ret -// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( -// -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) -// -// // define internal void [[PFN1]]( -// CHECK: store float {{1\.[0e\+]+}}, float* [[D:%.+]], align -// CHECK: [[C_VAL:%.+]] = load i8, i8* [[C:%.+]], align -// CHECK: [[CONV:%.+]] = sext i8 [[C_VAL]] to i32 -// CHECK: [[XOR:%.+]] = xor i32 [[CONV]], 2 -// CHECK: [[TRUNC:%.+]] = trunc i32 [[XOR]] to i8 -// CHECK: store i8 [[TRUNC]], i8* [[C]], align -// CHECK: [[DV:%.+]] = load float, float* [[D]], align -// CHECK: [[MUL:%.+]] = fmul float [[DV]], {{[0-9e\.\+]+}} -// CHECK: store float [[MUL]], float* [[D]], align -// CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [2 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: store i8* [[C]], i8** [[PTR1]], align -// CHECK: [[PTR2:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RL]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[D_CAST:%.+]] = bitcast float* [[D]] to i8* -// CHECK: store i8* [[D_CAST]], i8** [[PTR2]], align -// CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* -// CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 2, i{{32|64}} {{8|16}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) -// CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 -// CHECK: br i1 [[CMP]], label -// CHECK: [[C_INV8:%.+]] = load i8, i8* [[C_IN:%.+]], align -// CHECK: [[C_INV:%.+]] = sext i8 [[C_INV8]] to i32 -// CHECK: [[CV8:%.+]] = load i8, i8* [[C]], align -// CHECK: [[CV:%.+]] = sext i8 [[CV8]] to i32 -// CHECK: [[XOR:%.+]] = xor i32 [[C_INV]], [[CV]] -// CHECK: [[TRUNC:%.+]] = trunc i32 [[XOR]] to i8 -// CHECK: store i8 [[TRUNC]], i8* [[C_IN]], align -// CHECK: [[D_INV:%.+]] = load float, float* [[D_IN:%.+]], align -// CHECK: [[DV:%.+]] = load float, float* [[D]], align -// CHECK: [[MUL:%.+]] = fmul float [[D_INV]], [[DV]] -// CHECK: store float [[MUL]], float* [[D_IN]], align -// CHECK: call void @__kmpc_nvptx_end_reduce_nowait( -// CHECK: br label -// -// CHECK: ret -// // Reduction function -// CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* noundef %0, i8* noundef %1) -// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[VAR1_RHS:%.+]] = load i8*, i8** [[VAR1_RHS_REF]], -// -// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[VAR1_LHS:%.+]] = load i8*, i8** [[VAR1_LHS_REF]], -// -// CHECK: [[VAR2_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[VAR2_RHS_VOID:%.+]] = load i8*, i8** [[VAR2_RHS_REF]], -// CHECK: [[VAR2_RHS:%.+]] = bitcast i8* [[VAR2_RHS_VOID]] to float* -// -// CHECK: [[VAR2_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[VAR2_LHS_VOID:%.+]] = load i8*, i8** [[VAR2_LHS_REF]], -// CHECK: [[VAR2_LHS:%.+]] = bitcast i8* [[VAR2_LHS_VOID]] to float* -// -// CHECK: [[VAR1_LHS_VAL8:%.+]] = load i8, i8* [[VAR1_LHS]], -// CHECK: [[VAR1_LHS_VAL:%.+]] = sext i8 [[VAR1_LHS_VAL8]] to i32 -// CHECK: [[VAR1_RHS_VAL8:%.+]] = load i8, i8* [[VAR1_RHS]], -// CHECK: [[VAR1_RHS_VAL:%.+]] = sext i8 [[VAR1_RHS_VAL8]] to i32 -// CHECK: [[XOR:%.+]] = xor i32 [[VAR1_LHS_VAL]], [[VAR1_RHS_VAL]] -// CHECK: [[RES:%.+]] = trunc i32 [[XOR]] to i8 -// CHECK: store i8 [[RES]], i8* [[VAR1_LHS]], -// -// CHECK: [[VAR2_LHS_VAL:%.+]] = load float, float* [[VAR2_LHS]], -// CHECK: [[VAR2_RHS_VAL:%.+]] = load float, float* [[VAR2_RHS]], -// CHECK: [[RES:%.+]] = fmul float [[VAR2_LHS_VAL]], [[VAR2_RHS_VAL]] -// CHECK: store float [[RES]], float* [[VAR2_LHS]], -// CHECK: ret void -// // Shuffle and reduce function -// CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* noundef %0, i16 noundef {{.*}}, i16 noundef {{.*}}, i16 noundef {{.*}}) -// CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align -// CHECK: [[REMOTE_ELT1:%.+]] = alloca i8 -// CHECK: [[REMOTE_ELT2:%.+]] = alloca float -// -// CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align -// CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align -// CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align -// -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align -// -// CHECK: [[ELT_CAST:%.+]] = sext i8 [[ELT_VAL]] to i32 -// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size() -// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 -// CHECK: [[REMOTE_ELT1_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_CAST]], i16 [[LANEOFFSET]], i16 [[WS]]) -// CHECK: [[REMOTE_ELT1_VAL:%.+]] = trunc i32 [[REMOTE_ELT1_VAL32]] to i8 -// -// CHECK: store i8 [[REMOTE_ELT1_VAL]], i8* [[REMOTE_ELT1]], align -// CHECK: store i8* [[REMOTE_ELT1]], i8** [[REMOTE_ELT_REF]], align -// -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to float** -// CHECK: [[ELT:%.+]] = load float*, float** [[ELT_REF_CAST]], -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// -// CHECK: [[ELT_CAST:%.+]] = bitcast float* [[ELT]] to i32* -// CHECK: [[REMOTE_ELT2_CAST:%.+]] = bitcast float* [[REMOTE_ELT2]] to i32* -// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT_CAST]], align -// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size() -// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 -// CHECK: [[REMOTE_ELT2_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) -// -// CHECK: store i32 [[REMOTE_ELT2_VAL32]], i32* [[REMOTE_ELT2_CAST]], align -// CHECK: [[REMOTE_ELT2C:%.+]] = bitcast float* [[REMOTE_ELT2]] to i8* -// CHECK: store i8* [[REMOTE_ELT2C]], i8** [[REMOTE_ELT_REF]], align -// // Condition to reduce -// CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 -// -// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 -// CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] -// CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] -// -// CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 -// CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 -// CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 -// CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] -// CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 -// CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] -// -// CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] -// CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] -// CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] -// -// CHECK: [[DO_REDUCE]] -// CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* -// CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* -// CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) -// CHECK: br label {{%?}}[[REDUCE_CONT:.+]] -// -// CHECK: [[REDUCE_ELSE]] -// CHECK: br label {{%?}}[[REDUCE_CONT]] -// -// CHECK: [[REDUCE_CONT]] // Now check if we should just copy over the remote reduction list -// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 -// CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] -// CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] -// CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] -// -// CHECK: [[DO_COPY]] -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i8, i8* [[REMOTE_ELT_VOID]], align -// CHECK: store i8 [[REMOTE_ELT_VAL]], i8* [[ELT_VOID]], align -// -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[REMOTE_ELT_REF_CAST:%.+]] = bitcast i8** [[REMOTE_ELT_REF]] to float** -// CHECK: [[REMOTE_ELT:%.+]] = load float*, float** [[REMOTE_ELT_REF_CAST]], -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to float** -// CHECK: [[ELT:%.+]] = load float*, float** [[ELT_REF_CAST]], -// CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align -// CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align -// CHECK: br label {{%?}}[[COPY_CONT:.+]] -// -// CHECK: [[COPY_ELSE]] -// CHECK: br label {{%?}}[[COPY_CONT]] -// -// CHECK: [[COPY_CONT]] -// CHECK: void -// // Inter warp copy function -// CHECK: define internal void [[WARP_COPY_FN]](i8* noundef %0, i32 noundef %1) -// CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 -// CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 -// CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 -// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] -// // [[DO_COPY]] -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// -// CHECK: [[MEDIUM_ELT64:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] -// CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT64]] to i8 addrspace([[SHARED_ADDRSPACE]])* -// CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align -// CHECK: store volatile i8 [[ELT_VAL]], i8 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align -// CHECK: br label {{%?}}[[COPY_CONT:.+]] -// -// CHECK: [[COPY_ELSE]] -// CHECK: br label {{%?}}[[COPY_CONT]] -// // Barrier after copy to shared memory storage medium. -// CHECK: [[COPY_CONT]] -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* -// // Read into warp 0. -// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] -// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] -// -// CHECK: [[DO_READ]] -// CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] -// CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i8 addrspace([[SHARED_ADDRSPACE]])* -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i8, i8 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align -// CHECK: store i8 [[MEDIUM_ELT_VAL]], i8* [[ELT_VOID]], align -// CHECK: br label {{%?}}[[READ_CONT:.+]] -// -// CHECK: [[READ_ELSE]] -// CHECK: br label {{%?}}[[READ_CONT]] -// -// CHECK: [[READ_CONT]] -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 -// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] -// // [[DO_COPY]] -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* -// -// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] -// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align -// CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align -// CHECK: br label {{%?}}[[COPY_CONT:.+]] -// -// CHECK: [[COPY_ELSE]] -// CHECK: br label {{%?}}[[COPY_CONT]] -// // Barrier after copy to shared memory storage medium. -// CHECK: [[COPY_CONT]] -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* -// // Read into warp 0. -// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] -// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] -// -// CHECK: [[DO_READ]] -// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* -// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align -// CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], align -// CHECK: br label {{%?}}[[READ_CONT:.+]] -// -// CHECK: [[READ_ELSE]] -// CHECK: br label {{%?}}[[READ_CONT]] -// -// CHECK: [[READ_CONT]] -// CHECK: ret -// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( -// -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) -// -// // define internal void [[PFN2]]( -// CHECK: store i32 0, i32* [[A:%.+]], align -// CHECK: store i16 -32768, i16* [[B:%.+]], align -// CHECK: [[A_VAL:%.+]] = load i32, i32* [[A:%.+]], align -// CHECK: [[OR:%.+]] = or i32 [[A_VAL]], 1 -// CHECK: store i32 [[OR]], i32* [[A]], align -// CHECK: [[BV16:%.+]] = load i16, i16* [[B]], align -// CHECK: [[BV:%.+]] = sext i16 [[BV16]] to i32 -// CHECK: [[CMP:%.+]] = icmp sgt i32 99, [[BV]] -// CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] -// -// CHECK: [[DO_MAX]] -// CHECK: br label {{%?}}[[MAX_CONT:.+]] -// -// CHECK: [[MAX_ELSE]] -// CHECK: [[BV:%.+]] = load i16, i16* [[B]], align -// CHECK: [[MAX:%.+]] = sext i16 [[BV]] to i32 -// CHECK: br label {{%?}}[[MAX_CONT]] -// -// CHECK: [[MAX_CONT]] -// CHECK: [[B_LVALUE:%.+]] = phi i32 [ 99, %[[DO_MAX]] ], [ [[MAX]], %[[MAX_ELSE]] ] -// CHECK: [[TRUNC:%.+]] = trunc i32 [[B_LVALUE]] to i16 -// CHECK: store i16 [[TRUNC]], i16* [[B]], align -// CHECK: [[PTR1:%.+]] = getelementptr inbounds [[RLT:.+]], [2 x i8*]* [[RL:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[A_CAST:%.+]] = bitcast i32* [[A]] to i8* -// CHECK: store i8* [[A_CAST]], i8** [[PTR1]], align -// CHECK: [[PTR2:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RL]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[B_CAST:%.+]] = bitcast i16* [[B]] to i8* -// CHECK: store i8* [[B_CAST]], i8** [[PTR2]], align -// CHECK: [[ARG_RL:%.+]] = bitcast [[RLT]]* [[RL]] to i8* -// CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @{{.+}}, i32 {{.+}}, i32 2, i{{32|64}} {{8|16}}, i8* [[ARG_RL]], void (i8*, i16, i16, i16)* [[SHUFFLE_REDUCE_FN:@.+]], void (i8*, i32)* [[WARP_COPY_FN:@.+]]) -// CHECK: [[CMP:%.+]] = icmp eq i32 [[RET]], 1 -// CHECK: br i1 [[CMP]], label -// CHECK: [[A_INV:%.+]] = load i32, i32* [[A_IN:%.+]], align -// CHECK: [[AV:%.+]] = load i32, i32* [[A]], align -// CHECK: [[OR:%.+]] = or i32 [[A_INV]], [[AV]] -// CHECK: store i32 [[OR]], i32* [[A_IN]], align -// CHECK: [[B_INV16:%.+]] = load i16, i16* [[B_IN:%.+]], align -// CHECK: [[B_INV:%.+]] = sext i16 [[B_INV16]] to i32 -// CHECK: [[BV16:%.+]] = load i16, i16* [[B]], align -// CHECK: [[BV:%.+]] = sext i16 [[BV16]] to i32 -// CHECK: [[CMP:%.+]] = icmp sgt i32 [[B_INV]], [[BV]] -// CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] -// -// CHECK: [[DO_MAX]] -// CHECK: [[MAX1:%.+]] = load i16, i16* [[B_IN]], align -// CHECK: br label {{%?}}[[MAX_CONT:.+]] -// -// CHECK: [[MAX_ELSE]] -// CHECK: [[MAX2:%.+]] = load i16, i16* [[B]], align -// CHECK: br label {{%?}}[[MAX_CONT]] -// -// CHECK: [[MAX_CONT]] -// CHECK: [[B_MAX:%.+]] = phi i16 [ [[MAX1]], %[[DO_MAX]] ], [ [[MAX2]], %[[MAX_ELSE]] ] -// CHECK: store i16 [[B_MAX]], i16* [[B_IN]], align -// CHECK: call void @__kmpc_nvptx_end_reduce_nowait( -// CHECK: br label -// -// CHECK: ret -// // Reduction function -// CHECK: define internal void [[REDUCTION_FUNC:@.+]](i8* noundef %0, i8* noundef %1) -// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]], -// CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to i32* -// -// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]], -// CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to i32* -// -// CHECK: [[VAR2_RHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_RHS]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[VAR2_RHS_VOID:%.+]] = load i8*, i8** [[VAR2_RHS_REF]], -// CHECK: [[VAR2_RHS:%.+]] = bitcast i8* [[VAR2_RHS_VOID]] to i16* -// -// CHECK: [[VAR2_LHS_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST_LHS]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[VAR2_LHS_VOID:%.+]] = load i8*, i8** [[VAR2_LHS_REF]], -// CHECK: [[VAR2_LHS:%.+]] = bitcast i8* [[VAR2_LHS_VOID]] to i16* -// -// CHECK: [[VAR1_LHS_VAL:%.+]] = load i32, i32* [[VAR1_LHS]], -// CHECK: [[VAR1_RHS_VAL:%.+]] = load i32, i32* [[VAR1_RHS]], -// CHECK: [[OR:%.+]] = or i32 [[VAR1_LHS_VAL]], [[VAR1_RHS_VAL]] -// CHECK: store i32 [[OR]], i32* [[VAR1_LHS]], -// -// CHECK: [[VAR2_LHS_VAL16:%.+]] = load i16, i16* [[VAR2_LHS]], -// CHECK: [[VAR2_LHS_VAL:%.+]] = sext i16 [[VAR2_LHS_VAL16]] to i32 -// CHECK: [[VAR2_RHS_VAL16:%.+]] = load i16, i16* [[VAR2_RHS]], -// CHECK: [[VAR2_RHS_VAL:%.+]] = sext i16 [[VAR2_RHS_VAL16]] to i32 -// -// CHECK: [[CMP:%.+]] = icmp sgt i32 [[VAR2_LHS_VAL]], [[VAR2_RHS_VAL]] -// CHECK: br i1 [[CMP]], label {{%?}}[[DO_MAX:.+]], label {{%?}}[[MAX_ELSE:.+]] -// -// CHECK: [[DO_MAX]] -// CHECK: [[MAX1:%.+]] = load i16, i16* [[VAR2_LHS]], align -// CHECK: br label {{%?}}[[MAX_CONT:.+]] -// -// CHECK: [[MAX_ELSE]] -// CHECK: [[MAX2:%.+]] = load i16, i16* [[VAR2_RHS]], align -// CHECK: br label {{%?}}[[MAX_CONT]] -// -// CHECK: [[MAX_CONT]] -// CHECK: [[MAXV:%.+]] = phi i16 [ [[MAX1]], %[[DO_MAX]] ], [ [[MAX2]], %[[MAX_ELSE]] ] -// CHECK: store i16 [[MAXV]], i16* [[VAR2_LHS]], -// CHECK: ret void -// // Shuffle and reduce function -// CHECK: define internal void [[SHUFFLE_REDUCE_FN]](i8* noundef %0, i16 noundef {{.*}}, i16 noundef {{.*}}, i16 noundef {{.*}}) -// CHECK: [[REMOTE_RED_LIST:%.+]] = alloca [[RLT]], align -// CHECK: [[REMOTE_ELT1:%.+]] = alloca i32 -// CHECK: [[REMOTE_ELT2:%.+]] = alloca i16 -// -// CHECK: [[LANEID:%.+]] = load i16, i16* {{.+}}, align -// CHECK: [[LANEOFFSET:%.+]] = load i16, i16* {{.+}}, align -// CHECK: [[ALGVER:%.+]] = load i16, i16* {{.+}}, align -// -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to i32** -// CHECK: [[ELT:%.+]] = load i32*, i32** [[ELT_REF_CAST]], -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align -// -// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size() -// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 -// CHECK: [[REMOTE_ELT1_VAL:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]]) -// -// CHECK: store i32 [[REMOTE_ELT1_VAL]], i32* [[REMOTE_ELT1]], align -// CHECK: [[REMOTE_ELT1C:%.+]] = bitcast i32* [[REMOTE_ELT1]] to i8* -// CHECK: store i8* [[REMOTE_ELT1C]], i8** [[REMOTE_ELT_REF]], align -// -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to i16** -// CHECK: [[ELT:%.+]] = load i16*, i16** [[ELT_REF_CAST]], -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align -// -// CHECK: [[ELT_CAST:%.+]] = sext i16 [[ELT_VAL]] to i32 -// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size() -// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16 -// CHECK: [[REMOTE_ELT2_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_CAST]], i16 [[LANEOFFSET]], i16 [[WS]]) -// CHECK: [[REMOTE_ELT2_VAL:%.+]] = trunc i32 [[REMOTE_ELT2_VAL32]] to i16 -// -// CHECK: store i16 [[REMOTE_ELT2_VAL]], i16* [[REMOTE_ELT2]], align -// CHECK: [[REMOTE_ELT2C:%.+]] = bitcast i16* [[REMOTE_ELT2]] to i8* -// CHECK: store i8* [[REMOTE_ELT2C]], i8** [[REMOTE_ELT_REF]], align -// // Condition to reduce -// CHECK: [[CONDALG0:%.+]] = icmp eq i16 [[ALGVER]], 0 -// -// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 -// CHECK: [[COND2:%.+]] = icmp ult i16 [[LANEID]], [[LANEOFFSET]] -// CHECK: [[CONDALG1:%.+]] = and i1 [[COND1]], [[COND2]] -// -// CHECK: [[COND3:%.+]] = icmp eq i16 [[ALGVER]], 2 -// CHECK: [[COND4:%.+]] = and i16 [[LANEID]], 1 -// CHECK: [[COND5:%.+]] = icmp eq i16 [[COND4]], 0 -// CHECK: [[COND6:%.+]] = and i1 [[COND3]], [[COND5]] -// CHECK: [[COND7:%.+]] = icmp sgt i16 [[LANEOFFSET]], 0 -// CHECK: [[CONDALG2:%.+]] = and i1 [[COND6]], [[COND7]] -// -// CHECK: [[COND8:%.+]] = or i1 [[CONDALG0]], [[CONDALG1]] -// CHECK: [[SHOULD_REDUCE:%.+]] = or i1 [[COND8]], [[CONDALG2]] -// CHECK: br i1 [[SHOULD_REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]] -// -// CHECK: [[DO_REDUCE]] -// CHECK: [[RED_LIST1_VOID:%.+]] = bitcast [[RLT]]* [[RED_LIST]] to i8* -// CHECK: [[RED_LIST2_VOID:%.+]] = bitcast [[RLT]]* [[REMOTE_RED_LIST]] to i8* -// CHECK: call void [[REDUCTION_FUNC]](i8* [[RED_LIST1_VOID]], i8* [[RED_LIST2_VOID]]) -// CHECK: br label {{%?}}[[REDUCE_CONT:.+]] -// -// CHECK: [[REDUCE_ELSE]] -// CHECK: br label {{%?}}[[REDUCE_CONT]] -// -// CHECK: [[REDUCE_CONT]] // Now check if we should just copy over the remote reduction list -// CHECK: [[COND1:%.+]] = icmp eq i16 [[ALGVER]], 1 -// CHECK: [[COND2:%.+]] = icmp uge i16 [[LANEID]], [[LANEOFFSET]] -// CHECK: [[SHOULD_COPY:%.+]] = and i1 [[COND1]], [[COND2]] -// CHECK: br i1 [[SHOULD_COPY]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] -// -// CHECK: [[DO_COPY]] -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[REMOTE_ELT_REF_CAST:%.+]] = bitcast i8** [[REMOTE_ELT_REF]] to i32** -// CHECK: [[REMOTE_ELT:%.+]] = load i32*, i32** [[REMOTE_ELT_REF_CAST]], -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to i32** -// CHECK: [[ELT:%.+]] = load i32*, i32** [[ELT_REF_CAST]], -// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align -// CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align -// -// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[REMOTE_ELT_REF_CAST:%.+]] = bitcast i8** [[REMOTE_ELT_REF]] to i16** -// CHECK: [[REMOTE_ELT:%.+]] = load i16*, i16** [[REMOTE_ELT_REF_CAST]], -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_REF_CAST:%.+]] = bitcast i8** [[ELT_REF]] to i16** -// CHECK: [[ELT:%.+]] = load i16*, i16** [[ELT_REF_CAST]], -// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align -// CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align -// CHECK: br label {{%?}}[[COPY_CONT:.+]] -// -// CHECK: [[COPY_ELSE]] -// CHECK: br label {{%?}}[[COPY_CONT]] -// -// CHECK: [[COPY_CONT]] -// CHECK: void -// // Inter warp copy function -// CHECK: define internal void [[WARP_COPY_FN]](i8* noundef %0, i32 noundef %1) -// CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 -// CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 -// CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 -// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] -// // [[DO_COPY]] -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* -// -// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] -// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align -// CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align -// CHECK: br label {{%?}}[[COPY_CONT:.+]] -// -// CHECK: [[COPY_ELSE]] -// CHECK: br label {{%?}}[[COPY_CONT]] -// // Barrier after copy to shared memory storage medium. -// CHECK: [[COPY_CONT]] -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* -// // Read into warp 0. -// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] -// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] -// -// CHECK: [[DO_READ]] -// CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* -// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i32, i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align -// CHECK: store i32 [[MEDIUM_ELT_VAL]], i32* [[ELT]], align -// CHECK: br label {{%?}}[[READ_CONT:.+]] -// -// CHECK: [[READ_ELSE]] -// CHECK: br label {{%?}}[[READ_CONT]] -// -// CHECK: [[READ_CONT]] -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 -// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] -// // [[DO_COPY]] -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* -// -// CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] -// CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* -// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align -// CHECK: store volatile i16 [[ELT_VAL]], i16 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align -// CHECK: br label {{%?}}[[COPY_CONT:.+]] -// -// CHECK: [[COPY_ELSE]] -// CHECK: br label {{%?}}[[COPY_CONT]] -// // Barrier after copy to shared memory storage medium. -// CHECK: [[COPY_CONT]] -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @ -// CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32* -// // Read into warp 0. -// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]] -// CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] -// -// CHECK: [[DO_READ]] -// CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] -// CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* -// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 1 -// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], -// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* -// CHECK: [[MEDIUM_ELT_VAL:%.+]] = load volatile i16, i16 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align -// CHECK: store i16 [[MEDIUM_ELT_VAL]], i16* [[ELT]], align -// CHECK: br label {{%?}}[[READ_CONT:.+]] -// -// CHECK: [[READ_ELSE]] -// CHECK: br label {{%?}}[[READ_CONT]] -// -// CHECK: [[READ_CONT]] -// CHECK: ret #endif +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24 +// CHECK-64-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK-64-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* +// CHECK-64-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 +// CHECK-64-NEXT: store double 0.000000e+00, double* [[E1]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 +// CHECK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK-64-NEXT: store double [[ADD]], double* [[E1]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* +// CHECK-64-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-64-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i64 8, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-64-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +// CHECK-64-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-64: .omp.reduction.then: +// CHECK-64-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 +// CHECK-64-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] +// CHECK-64-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-64: .omp.reduction.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 +// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-64-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-64-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* +// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** +// CHECK-64-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i64 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* +// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK-64-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-64-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 +// CHECK-64-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) +// CHECK-64-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 +// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK-64-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-64-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 +// CHECK-64-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] +// CHECK-64-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-64-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK-64-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] +// CHECK-64-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] +// CHECK-64-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-64: then: +// CHECK-64-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK-64-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-64-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR3:[0-9]+]] +// CHECK-64-NEXT: br label [[IFCONT:%.*]] +// CHECK-64: else: +// CHECK-64-NEXT: br label [[IFCONT]] +// CHECK-64: ifcont: +// CHECK-64-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK-64-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK-64: then4: +// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK-64-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 8 +// CHECK-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** +// CHECK-64-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 8 +// CHECK-64-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 +// CHECK-64-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 +// CHECK-64-NEXT: br label [[IFCONT6:%.*]] +// CHECK-64: else5: +// CHECK-64-NEXT: br label [[IFCONT6]] +// CHECK-64: ifcont6: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* +// CHECK-64-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-64-NEXT: br label [[PRECOND:%.*]] +// CHECK-64: precond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 +// CHECK-64-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK-64: body: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-64: then: +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK-64-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-64-NEXT: br label [[IFCONT:%.*]] +// CHECK-64: else: +// CHECK-64-NEXT: br label [[IFCONT]] +// CHECK-64: ifcont: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-64: then2: +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4 +// CHECK-64-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK-64-NEXT: br label [[IFCONT4:%.*]] +// CHECK-64: else3: +// CHECK-64-NEXT: br label [[IFCONT4]] +// CHECK-64: ifcont4: +// CHECK-64-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK-64-NEXT: br label [[PRECOND]] +// CHECK-64: exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 +// CHECK-64-SAME: (i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 +// CHECK-64-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* +// CHECK-64-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP7]], i64 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK-64-NEXT: [[C1:%.*]] = alloca i8, align 1 +// CHECK-64-NEXT: [[D2:%.*]] = alloca float, align 4 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 +// CHECK-64-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK-64-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK-64-NEXT: store float 1.000000e+00, float* [[D2]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-64-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK-64-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 +// CHECK-64-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK-64-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 +// CHECK-64-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK-64-NEXT: store float [[MUL]], float* [[D2]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* [[C1]], i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* +// CHECK-64-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-64-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i64 16, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-64-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-64: .omp.reduction.then: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK-64-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-64-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK-64-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] +// CHECK-64-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 +// CHECK-64-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 +// CHECK-64-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-64: .omp.reduction.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 +// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 +// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-64-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-64-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK-64-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-64-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 +// CHECK-64-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 +// CHECK-64-NEXT: store i8 [[TMP18]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK-64-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** +// CHECK-64-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i64 1 +// CHECK-64-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* +// CHECK-64-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* +// CHECK-64-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-64-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK-64-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) +// CHECK-64-NEXT: store i32 [[TMP32]], i32* [[TMP28]], align 4 +// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i64 1 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i64 1 +// CHECK-64-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK-64-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 8 +// CHECK-64-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-64-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK-64-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-64-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 +// CHECK-64-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] +// CHECK-64-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-64-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] +// CHECK-64-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] +// CHECK-64-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] +// CHECK-64-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-64: then: +// CHECK-64-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK-64-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-64-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP48]], i8* [[TMP49]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[IFCONT:%.*]] +// CHECK-64: else: +// CHECK-64-NEXT: br label [[IFCONT]] +// CHECK-64: ifcont: +// CHECK-64-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] +// CHECK-64-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK-64: then5: +// CHECK-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 8 +// CHECK-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP56:%.*]] = load i8*, i8** [[TMP55]], align 8 +// CHECK-64-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 +// CHECK-64-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 +// CHECK-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** +// CHECK-64-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 8 +// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** +// CHECK-64-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 8 +// CHECK-64-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 +// CHECK-64-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 +// CHECK-64-NEXT: br label [[IFCONT7:%.*]] +// CHECK-64: else6: +// CHECK-64-NEXT: br label [[IFCONT7]] +// CHECK-64: ifcont7: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 +// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-64: then: +// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(3)* [[TMP10]] to i8 addrspace(3)* +// CHECK-64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP9]], align 1 +// CHECK-64-NEXT: store volatile i8 [[TMP12]], i8 addrspace(3)* [[TMP11]], align 1 +// CHECK-64-NEXT: br label [[IFCONT:%.*]] +// CHECK-64: else: +// CHECK-64-NEXT: br label [[IFCONT]] +// CHECK-64: ifcont: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-64: then2: +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(3)* [[TMP14]] to i8 addrspace(3)* +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP15]], align 1 +// CHECK-64-NEXT: store i8 [[TMP18]], i8* [[TMP17]], align 1 +// CHECK-64-NEXT: br label [[IFCONT4:%.*]] +// CHECK-64: else3: +// CHECK-64-NEXT: br label [[IFCONT4]] +// CHECK-64: ifcont4: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-64-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK-64: then6: +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i32* +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK-64-NEXT: store volatile i32 [[TMP23]], i32 addrspace(3)* [[TMP22]], align 4 +// CHECK-64-NEXT: br label [[IFCONT8:%.*]] +// CHECK-64: else7: +// CHECK-64-NEXT: br label [[IFCONT8]] +// CHECK-64: ifcont8: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] +// CHECK-64: then10: +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i8*, i8** [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to i32* +// CHECK-64-NEXT: [[TMP29:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP25]], align 4 +// CHECK-64-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK-64-NEXT: br label [[IFCONT12:%.*]] +// CHECK-64: else11: +// CHECK-64-NEXT: br label [[IFCONT12]] +// CHECK-64: ifcont12: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35 +// CHECK-64-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* +// CHECK-64-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* +// CHECK-64-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP8]], i64 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-64-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK-64-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-64-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 +// CHECK-64-NEXT: store i32 [[OR]], i32* [[A1]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 +// CHECK-64-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK-64-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-64-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK-64-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-64: .omp.reduction.then: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-64-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-64-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-64-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK-64-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] +// CHECK-64-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK-64: cond.true9: +// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-64-NEXT: br label [[COND_END11:%.*]] +// CHECK-64: cond.false10: +// CHECK-64-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-64-NEXT: br label [[COND_END11]] +// CHECK-64: cond.end11: +// CHECK-64-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK-64-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-64: .omp.reduction.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 +// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-64-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-64-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i64 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-64-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 +// CHECK-64-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) +// CHECK-64-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i64 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK-64-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK-64-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 +// CHECK-64-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* +// CHECK-64-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK-64-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 +// CHECK-64-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-64-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK-64-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) +// CHECK-64-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 +// CHECK-64-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 +// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK-64-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK-64-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 8 +// CHECK-64-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-64-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK-64-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-64-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 +// CHECK-64-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] +// CHECK-64-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-64-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] +// CHECK-64-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] +// CHECK-64-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] +// CHECK-64-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-64: then: +// CHECK-64-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK-64-NEXT: [[TMP50:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-64-NEXT: call void @"_omp$reduction$reduction_func6"(i8* [[TMP49]], i8* [[TMP50]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[IFCONT:%.*]] +// CHECK-64: else: +// CHECK-64-NEXT: br label [[IFCONT]] +// CHECK-64: ifcont: +// CHECK-64-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] +// CHECK-64-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK-64: then5: +// CHECK-64-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK-64-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 8 +// CHECK-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK-64-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 8 +// CHECK-64-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK-64-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 +// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK-64-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 8 +// CHECK-64-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK-64-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 8 +// CHECK-64-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK-64-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 +// CHECK-64-NEXT: br label [[IFCONT7:%.*]] +// CHECK-64: else6: +// CHECK-64-NEXT: br label [[IFCONT7]] +// CHECK-64: ifcont7: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 +// CHECK-64-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-64-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-64: then: +// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-64-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK-64-NEXT: br label [[IFCONT:%.*]] +// CHECK-64: else: +// CHECK-64-NEXT: br label [[IFCONT]] +// CHECK-64: ifcont: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-64: then2: +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK-64-NEXT: [[TMP18:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-64-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK-64-NEXT: br label [[IFCONT4:%.*]] +// CHECK-64: else3: +// CHECK-64-NEXT: br label [[IFCONT4]] +// CHECK-64: ifcont4: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-64-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK-64: then6: +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i16* +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast i32 addrspace(3)* [[TMP22]] to i16 addrspace(3)* +// CHECK-64-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP21]], align 2 +// CHECK-64-NEXT: store volatile i16 [[TMP24]], i16 addrspace(3)* [[TMP23]], align 2 +// CHECK-64-NEXT: br label [[IFCONT8:%.*]] +// CHECK-64: else7: +// CHECK-64-NEXT: br label [[IFCONT8]] +// CHECK-64: ifcont8: +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-64-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] +// CHECK-64: then10: +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-64-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(3)* [[TMP26]] to i16 addrspace(3)* +// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP28]], align 8 +// CHECK-64-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i16* +// CHECK-64-NEXT: [[TMP31:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP27]], align 2 +// CHECK-64-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 2 +// CHECK-64-NEXT: br label [[IFCONT12:%.*]] +// CHECK-64: else11: +// CHECK-64-NEXT: br label [[IFCONT12]] +// CHECK-64: ifcont12: +// CHECK-64-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24 +// CHECK-32-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* +// CHECK-32-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK-32-NEXT: store double 0.000000e+00, double* [[E1]], align 8 +// CHECK-32-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 +// CHECK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK-32-NEXT: store double [[ADD]], double* [[E1]], align 8 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* +// CHECK-32-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 4, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +// CHECK-32-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32: .omp.reduction.then: +// CHECK-32-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK-32-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 +// CHECK-32-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] +// CHECK-32-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-32: .omp.reduction.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 +// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-32-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-32-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* +// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** +// CHECK-32-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i32 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* +// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK-32-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK-32-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 +// CHECK-32-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) +// CHECK-32-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 8 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 +// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK-32-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-32-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-32-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 +// CHECK-32-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] +// CHECK-32-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-32-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK-32-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] +// CHECK-32-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] +// CHECK-32-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32: then: +// CHECK-32-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK-32-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-32-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR3:[0-9]+]] +// CHECK-32-NEXT: br label [[IFCONT:%.*]] +// CHECK-32: else: +// CHECK-32-NEXT: br label [[IFCONT]] +// CHECK-32: ifcont: +// CHECK-32-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK-32-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK-32: then4: +// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK-32-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 4 +// CHECK-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** +// CHECK-32-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 4 +// CHECK-32-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 +// CHECK-32-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 +// CHECK-32-NEXT: br label [[IFCONT6:%.*]] +// CHECK-32: else5: +// CHECK-32-NEXT: br label [[IFCONT6]] +// CHECK-32: ifcont6: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* +// CHECK-32-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-NEXT: br label [[PRECOND:%.*]] +// CHECK-32: precond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 +// CHECK-32-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK-32: body: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32: then: +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK-32-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-32-NEXT: br label [[IFCONT:%.*]] +// CHECK-32: else: +// CHECK-32-NEXT: br label [[IFCONT]] +// CHECK-32: ifcont: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-32: then2: +// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4 +// CHECK-32-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK-32-NEXT: br label [[IFCONT4:%.*]] +// CHECK-32: else3: +// CHECK-32-NEXT: br label [[IFCONT4]] +// CHECK-32: ifcont4: +// CHECK-32-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-NEXT: br label [[PRECOND]] +// CHECK-32: exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 +// CHECK-32-SAME: (i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 +// CHECK-32-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* +// CHECK-32-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP7]], i32 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK-32-NEXT: [[C1:%.*]] = alloca i8, align 1 +// CHECK-32-NEXT: [[D2:%.*]] = alloca float, align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 +// CHECK-32-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 +// CHECK-32-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK-32-NEXT: store float 1.000000e+00, float* [[D2]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK-32-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 +// CHECK-32-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK-32-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 +// CHECK-32-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK-32-NEXT: store float [[MUL]], float* [[D2]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* +// CHECK-32-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i32 8, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-32-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32: .omp.reduction.then: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK-32-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-32-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK-32-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] +// CHECK-32-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 +// CHECK-32-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 +// CHECK-32-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-32: .omp.reduction.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 +// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 +// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-32-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-32-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK-32-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 +// CHECK-32-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK-32-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 +// CHECK-32-NEXT: store i8 [[TMP18]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** +// CHECK-32-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i32 1 +// CHECK-32-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* +// CHECK-32-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* +// CHECK-32-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK-32-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) +// CHECK-32-NEXT: store i32 [[TMP32]], i32* [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i32 1 +// CHECK-32-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK-32-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-32-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK-32-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-32-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 +// CHECK-32-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] +// CHECK-32-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-32-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] +// CHECK-32-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] +// CHECK-32-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] +// CHECK-32-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32: then: +// CHECK-32-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK-32-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-32-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP48]], i8* [[TMP49]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[IFCONT:%.*]] +// CHECK-32: else: +// CHECK-32-NEXT: br label [[IFCONT]] +// CHECK-32: ifcont: +// CHECK-32-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] +// CHECK-32-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK-32: then5: +// CHECK-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 +// CHECK-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP56:%.*]] = load i8*, i8** [[TMP55]], align 4 +// CHECK-32-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 +// CHECK-32-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 +// CHECK-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** +// CHECK-32-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 4 +// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** +// CHECK-32-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 4 +// CHECK-32-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 +// CHECK-32-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 +// CHECK-32-NEXT: br label [[IFCONT7:%.*]] +// CHECK-32: else6: +// CHECK-32-NEXT: br label [[IFCONT7]] +// CHECK-32: ifcont7: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 +// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32: then: +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(3)* [[TMP10]] to i8 addrspace(3)* +// CHECK-32-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP9]], align 1 +// CHECK-32-NEXT: store volatile i8 [[TMP12]], i8 addrspace(3)* [[TMP11]], align 1 +// CHECK-32-NEXT: br label [[IFCONT:%.*]] +// CHECK-32: else: +// CHECK-32-NEXT: br label [[IFCONT]] +// CHECK-32: ifcont: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-32: then2: +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(3)* [[TMP14]] to i8 addrspace(3)* +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP15]], align 1 +// CHECK-32-NEXT: store i8 [[TMP18]], i8* [[TMP17]], align 1 +// CHECK-32-NEXT: br label [[IFCONT4:%.*]] +// CHECK-32: else3: +// CHECK-32-NEXT: br label [[IFCONT4]] +// CHECK-32: ifcont4: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK-32: then6: +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i32* +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK-32-NEXT: store volatile i32 [[TMP23]], i32 addrspace(3)* [[TMP22]], align 4 +// CHECK-32-NEXT: br label [[IFCONT8:%.*]] +// CHECK-32: else7: +// CHECK-32-NEXT: br label [[IFCONT8]] +// CHECK-32: ifcont8: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] +// CHECK-32: then10: +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP27:%.*]] = load i8*, i8** [[TMP26]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to i32* +// CHECK-32-NEXT: [[TMP29:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP25]], align 4 +// CHECK-32-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK-32-NEXT: br label [[IFCONT12:%.*]] +// CHECK-32: else11: +// CHECK-32-NEXT: br label [[IFCONT12]] +// CHECK-32: ifcont12: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35 +// CHECK-32-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* +// CHECK-32-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* +// CHECK-32-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP8]], i32 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK-32-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 +// CHECK-32-NEXT: store i32 [[OR]], i32* [[A1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 +// CHECK-32-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK-32-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK-32-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32: .omp.reduction.then: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-32-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK-32-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] +// CHECK-32-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK-32: cond.true9: +// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-32-NEXT: br label [[COND_END11:%.*]] +// CHECK-32: cond.false10: +// CHECK-32-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-NEXT: br label [[COND_END11]] +// CHECK-32: cond.end11: +// CHECK-32-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK-32-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-32: .omp.reduction.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 +// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-32-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-32-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 +// CHECK-32-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) +// CHECK-32-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK-32-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK-32-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK-32-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* +// CHECK-32-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK-32-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 +// CHECK-32-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK-32-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) +// CHECK-32-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 +// CHECK-32-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK-32-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK-32-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 4 +// CHECK-32-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-32-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK-32-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-32-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 +// CHECK-32-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] +// CHECK-32-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-32-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] +// CHECK-32-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] +// CHECK-32-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] +// CHECK-32-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32: then: +// CHECK-32-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK-32-NEXT: [[TMP50:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-32-NEXT: call void @"_omp$reduction$reduction_func6"(i8* [[TMP49]], i8* [[TMP50]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[IFCONT:%.*]] +// CHECK-32: else: +// CHECK-32-NEXT: br label [[IFCONT]] +// CHECK-32: ifcont: +// CHECK-32-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] +// CHECK-32-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK-32: then5: +// CHECK-32-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK-32-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 4 +// CHECK-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK-32-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 4 +// CHECK-32-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK-32-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 +// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK-32-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 4 +// CHECK-32-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK-32-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 4 +// CHECK-32-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK-32-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 +// CHECK-32-NEXT: br label [[IFCONT7:%.*]] +// CHECK-32: else6: +// CHECK-32-NEXT: br label [[IFCONT7]] +// CHECK-32: ifcont7: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 +// CHECK-32-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32: then: +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-32-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK-32-NEXT: br label [[IFCONT:%.*]] +// CHECK-32: else: +// CHECK-32-NEXT: br label [[IFCONT]] +// CHECK-32: ifcont: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-32: then2: +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK-32-NEXT: [[TMP18:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-32-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK-32-NEXT: br label [[IFCONT4:%.*]] +// CHECK-32: else3: +// CHECK-32-NEXT: br label [[IFCONT4]] +// CHECK-32: ifcont4: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK-32: then6: +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i16* +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast i32 addrspace(3)* [[TMP22]] to i16 addrspace(3)* +// CHECK-32-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP21]], align 2 +// CHECK-32-NEXT: store volatile i16 [[TMP24]], i16 addrspace(3)* [[TMP23]], align 2 +// CHECK-32-NEXT: br label [[IFCONT8:%.*]] +// CHECK-32: else7: +// CHECK-32-NEXT: br label [[IFCONT8]] +// CHECK-32: ifcont8: +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] +// CHECK-32: then10: +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(3)* [[TMP26]] to i16 addrspace(3)* +// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i16* +// CHECK-32-NEXT: [[TMP31:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP27]], align 2 +// CHECK-32-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 2 +// CHECK-32-NEXT: br label [[IFCONT12:%.*]] +// CHECK-32: else11: +// CHECK-32-NEXT: br label [[IFCONT12]] +// CHECK-32: ifcont12: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24 +// CHECK-32-EX-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-EX-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-EX-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK-32-EX-NEXT: store double 0.000000e+00, double* [[E1]], align 8 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK-32-EX-NEXT: store double [[ADD]], double* [[E1]], align 8 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 4, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-EX: .omp.reduction.then: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] +// CHECK-32-EX-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-32-EX: .omp.reduction.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 +// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to double** +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP11]], i32 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP11]] to i64* +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) +// CHECK-32-EX-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 8 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] +// CHECK-32-EX-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] +// CHECK-32-EX-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32-EX: then: +// CHECK-32-EX-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK-32-EX-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-32-EX-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR3:[0-9]+]] +// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] +// CHECK-32-EX: else: +// CHECK-32-EX-NEXT: br label [[IFCONT]] +// CHECK-32-EX: ifcont: +// CHECK-32-EX-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK-32-EX-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK-32-EX: then4: +// CHECK-32-EX-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load double*, double** [[TMP42]], align 4 +// CHECK-32-EX-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double** +// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load double*, double** [[TMP45]], align 4 +// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load double, double* [[TMP43]], align 8 +// CHECK-32-EX-NEXT: store double [[TMP47]], double* [[TMP46]], align 8 +// CHECK-32-EX-NEXT: br label [[IFCONT6:%.*]] +// CHECK-32-EX: else5: +// CHECK-32-EX-NEXT: br label [[IFCONT6]] +// CHECK-32-EX: ifcont6: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-EX-NEXT: br label [[PRECOND:%.*]] +// CHECK-32-EX: precond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 +// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK-32-EX: body: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32-EX: then: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK-32-EX-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] +// CHECK-32-EX: else: +// CHECK-32-EX-NEXT: br label [[IFCONT]] +// CHECK-32-EX: ifcont: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-32-EX: then2: +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]] +// CHECK-32-EX: else3: +// CHECK-32-EX-NEXT: br label [[IFCONT4]] +// CHECK-32-EX: ifcont4: +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK-32-EX-NEXT: br label [[PRECOND]] +// CHECK-32-EX: exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 +// CHECK-32-EX-SAME: (i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP7]], i32 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK-32-EX-NEXT: [[C1:%.*]] = alloca i8, align 1 +// CHECK-32-EX-NEXT: [[D2:%.*]] = alloca float, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK-32-EX-NEXT: store float 1.000000e+00, float* [[D2]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK-32-EX-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 +// CHECK-32-EX-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK-32-EX-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK-32-EX-NEXT: store float [[MUL]], float* [[D2]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i32 8, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-EX: .omp.reduction.then: +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK-32-EX-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK-32-EX-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK-32-EX-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] +// CHECK-32-EX-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 +// CHECK-32-EX-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 +// CHECK-32-EX-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-32-EX: .omp.reduction.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 +// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 +// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 +// CHECK-32-EX-NEXT: store i8 [[TMP18]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-EX-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to float** +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP22]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[TMP23]], i32 1 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to i8* +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP23]] to i32* +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) +// CHECK-32-EX-NEXT: store i32 [[TMP32]], i32* [[TMP28]], align 4 +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP28]], i32 1 +// CHECK-32-EX-NEXT: [[TMP35:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP35]], i8** [[TMP24]], align 4 +// CHECK-32-EX-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-32-EX-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK-32-EX-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-32-EX-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 +// CHECK-32-EX-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] +// CHECK-32-EX-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-32-EX-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] +// CHECK-32-EX-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] +// CHECK-32-EX-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] +// CHECK-32-EX-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32-EX: then: +// CHECK-32-EX-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK-32-EX-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-32-EX-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP48]], i8* [[TMP49]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] +// CHECK-32-EX: else: +// CHECK-32-EX-NEXT: br label [[IFCONT]] +// CHECK-32-EX: ifcont: +// CHECK-32-EX-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] +// CHECK-32-EX-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK-32-EX: then5: +// CHECK-32-EX-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 +// CHECK-32-EX-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP56:%.*]] = load i8*, i8** [[TMP55]], align 4 +// CHECK-32-EX-NEXT: [[TMP57:%.*]] = load i8, i8* [[TMP54]], align 1 +// CHECK-32-EX-NEXT: store i8 [[TMP57]], i8* [[TMP56]], align 1 +// CHECK-32-EX-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to float** +// CHECK-32-EX-NEXT: [[TMP60:%.*]] = load float*, float** [[TMP59]], align 4 +// CHECK-32-EX-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to float** +// CHECK-32-EX-NEXT: [[TMP63:%.*]] = load float*, float** [[TMP62]], align 4 +// CHECK-32-EX-NEXT: [[TMP64:%.*]] = load float, float* [[TMP60]], align 4 +// CHECK-32-EX-NEXT: store float [[TMP64]], float* [[TMP63]], align 4 +// CHECK-32-EX-NEXT: br label [[IFCONT7:%.*]] +// CHECK-32-EX: else6: +// CHECK-32-EX-NEXT: br label [[IFCONT7]] +// CHECK-32-EX: ifcont7: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 +// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32-EX: then: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(3)* [[TMP10]] to i8 addrspace(3)* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP9]], align 1 +// CHECK-32-EX-NEXT: store volatile i8 [[TMP12]], i8 addrspace(3)* [[TMP11]], align 1 +// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] +// CHECK-32-EX: else: +// CHECK-32-EX-NEXT: br label [[IFCONT]] +// CHECK-32-EX: ifcont: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-32-EX: then2: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(3)* [[TMP14]] to i8 addrspace(3)* +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP15]], align 1 +// CHECK-32-EX-NEXT: store i8 [[TMP18]], i8* [[TMP17]], align 1 +// CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]] +// CHECK-32-EX: else3: +// CHECK-32-EX-NEXT: br label [[IFCONT4]] +// CHECK-32-EX: ifcont4: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK-32-EX: then6: +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i32* +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK-32-EX-NEXT: store volatile i32 [[TMP23]], i32 addrspace(3)* [[TMP22]], align 4 +// CHECK-32-EX-NEXT: br label [[IFCONT8:%.*]] +// CHECK-32-EX: else7: +// CHECK-32-EX-NEXT: br label [[IFCONT8]] +// CHECK-32-EX: ifcont8: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] +// CHECK-32-EX: then10: +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i8*, i8** [[TMP26]], align 4 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to i32* +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP25]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK-32-EX-NEXT: br label [[IFCONT12:%.*]] +// CHECK-32-EX: else11: +// CHECK-32-EX-NEXT: br label [[IFCONT12]] +// CHECK-32-EX: ifcont12: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35 +// CHECK-32-EX-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP8]], i32 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 +// CHECK-32-EX-NEXT: store i32 [[OR]], i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 +// CHECK-32-EX-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-EX: .omp.reduction.then: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-32-EX-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK-32-EX-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] +// CHECK-32-EX-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK-32-EX: cond.true9: +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK-32-EX-NEXT: br label [[COND_END11:%.*]] +// CHECK-32-EX: cond.false10: +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK-32-EX-NEXT: br label [[COND_END11]] +// CHECK-32-EX: cond.end11: +// CHECK-32-EX-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK-32-EX-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK-32-EX: .omp.reduction.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 +// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) +// CHECK-32-EX-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP21]], i8** [[TMP12]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i16** +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i16*, i16** [[TMP23]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = bitcast i16* [[TMP26]] to i8* +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 +// CHECK-32-EX-NEXT: store i16 [[TMP33]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK-32-EX-NEXT: [[TMP35:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK-32-EX-NEXT: [[TMP36:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP36]], i8** [[TMP25]], align 4 +// CHECK-32-EX-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK-32-EX-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +// CHECK-32-EX-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK-32-EX-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 +// CHECK-32-EX-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] +// CHECK-32-EX-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK-32-EX-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] +// CHECK-32-EX-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] +// CHECK-32-EX-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] +// CHECK-32-EX-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32-EX: then: +// CHECK-32-EX-NEXT: [[TMP49:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK-32-EX-NEXT: [[TMP50:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK-32-EX-NEXT: call void @"_omp$reduction$reduction_func6"(i8* [[TMP49]], i8* [[TMP50]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] +// CHECK-32-EX: else: +// CHECK-32-EX-NEXT: br label [[IFCONT]] +// CHECK-32-EX: ifcont: +// CHECK-32-EX-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] +// CHECK-32-EX-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK-32-EX: then5: +// CHECK-32-EX-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32** +// CHECK-32-EX-NEXT: [[TMP56:%.*]] = load i32*, i32** [[TMP55]], align 4 +// CHECK-32-EX-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32** +// CHECK-32-EX-NEXT: [[TMP59:%.*]] = load i32*, i32** [[TMP58]], align 4 +// CHECK-32-EX-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 +// CHECK-32-EX-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i16** +// CHECK-32-EX-NEXT: [[TMP63:%.*]] = load i16*, i16** [[TMP62]], align 4 +// CHECK-32-EX-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i16** +// CHECK-32-EX-NEXT: [[TMP66:%.*]] = load i16*, i16** [[TMP65]], align 4 +// CHECK-32-EX-NEXT: [[TMP67:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP67]], i16* [[TMP66]], align 2 +// CHECK-32-EX-NEXT: br label [[IFCONT7:%.*]] +// CHECK-32-EX: else6: +// CHECK-32-EX-NEXT: br label [[IFCONT7]] +// CHECK-32-EX: ifcont7: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 +// CHECK-32-EX-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [2 x i8*]* +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-32-EX: then: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-32-EX-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]] +// CHECK-32-EX: else: +// CHECK-32-EX-NEXT: br label [[IFCONT]] +// CHECK-32-EX: ifcont: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK-32-EX: then2: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]] +// CHECK-32-EX: else3: +// CHECK-32-EX-NEXT: br label [[IFCONT4]] +// CHECK-32-EX: ifcont4: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK-32-EX: then6: +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to i16* +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = bitcast i32 addrspace(3)* [[TMP22]] to i16 addrspace(3)* +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP21]], align 2 +// CHECK-32-EX-NEXT: store volatile i16 [[TMP24]], i16 addrspace(3)* [[TMP23]], align 2 +// CHECK-32-EX-NEXT: br label [[IFCONT8:%.*]] +// CHECK-32-EX: else7: +// CHECK-32-EX-NEXT: br label [[IFCONT8]] +// CHECK-32-EX: ifcont8: +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] +// CHECK-32-EX: then10: +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(3)* [[TMP26]] to i16 addrspace(3)* +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP7]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP28]], align 4 +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to i16* +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP27]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 2 +// CHECK-32-EX-NEXT: br label [[IFCONT12:%.*]] +// CHECK-32-EX: else11: +// CHECK-32-EX-NEXT: br label [[IFCONT12]] +// CHECK-32-EX: ifcont12: +// CHECK-32-EX-NEXT: ret void +// diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp index 9e2b35a3b8b0b7..4dacc010267ef6 100644 --- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -1,25 +1,22 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK45-64 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32-EX // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32-EX // expected-no-diagnostics #ifndef HEADER #define HEADER // Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak protected constant i8 2 #define N 1000 @@ -60,33 +57,1457 @@ int bar(int n){ return a; } -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l32}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-NOT: call void @__kmpc_for_static_init -// CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) -// CHECK: ret void - -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l37}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-NOT: call void @__kmpc_for_static_init -// CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) -// CHECK: ret void - -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l42}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-NOT: call void @__kmpc_for_static_init -// CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) -// CHECK: ret void - -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l47}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK-NOT: call void @__kmpc_for_static_init -// CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK-NOT: call void @__kmpc_nvptx_end_reduce_nowait( -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) -// CHECK: ret void - #endif +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 +// CHECK45-64-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK45-64-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK45-64: simd.if.then: +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-64: omp.inner.for.cond: +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK45-64-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-64: omp.inner.for.body: +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-64: omp.body.continue: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-64: omp.inner.for.inc: +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK45-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// CHECK45-64: omp.inner.for.end: +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK45-64-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK45-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 +// CHECK45-64-NEXT: br label [[SIMD_IF_END]] +// CHECK45-64: simd.if.end: +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK45-64-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK45-64-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK45-64: simd.if.then: +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-64: omp.inner.for.cond: +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK45-64-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-64: omp.inner.for.body: +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-64-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 +// CHECK45-64-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-64: omp.body.continue: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-64: omp.inner.for.inc: +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// CHECK45-64: omp.inner.for.end: +// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0 +// CHECK45-64-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK45-64-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK45-64-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK45-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 +// CHECK45-64-NEXT: br label [[SIMD_IF_END]] +// CHECK45-64: simd.if.end: +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK45-64-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-64: omp.inner.for.cond: +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-64: omp.inner.for.body: +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK45-64-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-64: omp.body.continue: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-64: omp.inner.for.inc: +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// CHECK45-64: omp.inner.for.end: +// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK45-64-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[N1:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[N1]], align 4 +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-64: omp.inner.for.cond: +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 +// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-64: omp.inner.for.body: +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-64-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-64: omp.body.continue: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-64: omp.inner.for.inc: +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// CHECK45-64: omp.inner.for.end: +// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 +// CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 +// CHECK45-32-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK45-32-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK45-32: simd.if.then: +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32: omp.inner.for.cond: +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK45-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32: omp.inner.for.body: +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP8]] +// CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32: omp.body.continue: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32: omp.inner.for.inc: +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK45-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// CHECK45-32: omp.inner.for.end: +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK45-32-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK45-32-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 +// CHECK45-32-NEXT: br label [[SIMD_IF_END]] +// CHECK45-32: simd.if.end: +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK45-32-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK45-32-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK45-32: simd.if.then: +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32: omp.inner.for.cond: +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK45-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32: omp.inner.for.body: +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP8]] +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-32-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 +// CHECK45-32-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32: omp.body.continue: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32: omp.inner.for.inc: +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// CHECK45-32: omp.inner.for.end: +// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0 +// CHECK45-32-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK45-32-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK45-32-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 +// CHECK45-32-NEXT: br label [[SIMD_IF_END]] +// CHECK45-32: simd.if.end: +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK45-32-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32: omp.inner.for.cond: +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32: omp.inner.for.body: +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP4]] +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK45-32-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32: omp.body.continue: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32: omp.inner.for.inc: +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// CHECK45-32: omp.inner.for.end: +// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK45-32-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[N1:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[N1]], align 4 +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32: omp.inner.for.cond: +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 +// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32: omp.inner.for.body: +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP5]] +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-32-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32: omp.body.continue: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32: omp.inner.for.inc: +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// CHECK45-32: omp.inner.for.end: +// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 +// CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 +// CHECK45-32-EX-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK45-32-EX: simd.if.then: +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32-EX: omp.inner.for.cond: +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32-EX: omp.inner.for.body: +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP8]] +// CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32-EX: omp.body.continue: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32-EX: omp.inner.for.inc: +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: omp.inner.for.end: +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK45-32-EX-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK45-32-EX-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 +// CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]] +// CHECK45-32-EX: simd.if.end: +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK45-32-EX-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK45-32-EX: simd.if.then: +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32-EX: omp.inner.for.cond: +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32-EX: omp.inner.for.body: +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP8]] +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-32-EX-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 +// CHECK45-32-EX-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32-EX: omp.body.continue: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32-EX: omp.inner.for.inc: +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: omp.inner.for.end: +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0 +// CHECK45-32-EX-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK45-32-EX-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK45-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 +// CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]] +// CHECK45-32-EX: simd.if.end: +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK45-32-EX-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32-EX: omp.inner.for.cond: +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32-EX: omp.inner.for.body: +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP4]] +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32-EX: omp.body.continue: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32-EX: omp.inner.for.inc: +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: omp.inner.for.end: +// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK45-32-EX-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[N1:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[N1]], align 4 +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32-EX: omp.inner.for.cond: +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 +// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32-EX: omp.inner.for.body: +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP5]] +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32-EX: omp.body.continue: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32-EX: omp.inner.for.inc: +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: omp.inner.for.end: +// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 +// CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 +// CHECK-64-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK-64: simd.if.then: +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK-64-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK-64-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 +// CHECK-64-NEXT: br label [[SIMD_IF_END]] +// CHECK-64: simd.if.end: +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK-64-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK-64: simd.if.then: +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK-64-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-64-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 +// CHECK-64-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0 +// CHECK-64-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK-64-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK-64-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 +// CHECK-64-NEXT: br label [[SIMD_IF_END]] +// CHECK-64: simd.if.end: +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK-64-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK-64-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[N1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[N1]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 +// CHECK-32-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK-32: simd.if.then: +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP8]] +// CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK-32-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK-32-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 +// CHECK-32-NEXT: br label [[SIMD_IF_END]] +// CHECK-32: simd.if.end: +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK-32-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK-32: simd.if.then: +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP8]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-32-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 +// CHECK-32-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0 +// CHECK-32-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK-32-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK-32-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 +// CHECK-32-NEXT: br label [[SIMD_IF_END]] +// CHECK-32: simd.if.end: +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK-32-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP4]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK-32-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[N1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[N1]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP5]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 +// CHECK-32-EX-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK-32-EX: simd.if.then: +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP8]] +// CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK-32-EX-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK-32-EX-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 +// CHECK-32-EX-NEXT: br label [[SIMD_IF_END]] +// CHECK-32-EX: simd.if.end: +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK-32-EX-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] +// CHECK-32-EX: simd.if.then: +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]] +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-32-EX-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 +// CHECK-32-EX-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0 +// CHECK-32-EX-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK-32-EX-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 +// CHECK-32-EX-NEXT: br label [[SIMD_IF_END]] +// CHECK-32-EX: simd.if.end: +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK-32-EX-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP4]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK-32-EX-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[N1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[N1]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp index e7ba9671b97bd8..00ddea65f2ac68 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -1,25 +1,22 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK45-64 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32 +// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK45-32-EX // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-64 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32 +// RUN: %clang_cc1 -no-enable-noundef-analysis -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK-32-EX // expected-no-diagnostics #ifndef HEADER #define HEADER // Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak protected constant i8 2 -// CHECK-DAG: {{@__omp_offloading_.+l53}}_exec_mode = weak protected constant i8 2 #define N 1000 #define M 10 @@ -69,38 +66,3481 @@ int bar(int n){ return a; } -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l37( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) - -// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: call void @__kmpc_distribute_static_fini( -// CHECK: ret void - -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) - -// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: call void @__kmpc_distribute_static_fini( -// CHECK: ret void - -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) - -// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: call void @__kmpc_distribute_static_fini( -// CHECK: ret void - -// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]]) -// CHECK: store {{.+}} [[F_IN]], ptr {{.+}}, -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) - -// CHECK: store {{.+}} 99, ptr [[COMB_UB:%.+]], align -// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, ptr [[COMB_UB]], -// CHECK: call void @__kmpc_distribute_static_fini( -// CHECK: ret void - #endif +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK45-64-SAME: (i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-64-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-64-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK45-64: omp.precond.then: +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-64: omp.dispatch.cond: +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-64: cond.true: +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-64-NEXT: br label [[COND_END:%.*]] +// CHECK45-64: cond.false: +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: br label [[COND_END]] +// CHECK45-64: cond.end: +// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-64: omp.dispatch.body: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-64: omp.inner.for.cond: +// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-64: omp.inner.for.body: +// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-64: omp.body.continue: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-64: omp.inner.for.inc: +// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK45-64: omp.inner.for.end: +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-64: omp.dispatch.inc: +// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-64: omp.dispatch.end: +// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-64: .omp.final.then: +// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-64-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK45-64-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 +// CHECK45-64-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK45-64-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 +// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-64: .omp.final.done: +// CHECK45-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK45-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK45-64: .omp.lastprivate.then: +// CHECK45-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK45-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK45-64: .omp.lastprivate.done: +// CHECK45-64-NEXT: br label [[OMP_PRECOND_END]] +// CHECK45-64: omp.precond.end: +// CHECK45-64-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i64 4) +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40 +// CHECK45-64-SAME: (i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-64-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK45-64: omp.precond.then: +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-64: omp.dispatch.cond: +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-64: cond.true: +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: br label [[COND_END:%.*]] +// CHECK45-64: cond.false: +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: br label [[COND_END]] +// CHECK45-64: cond.end: +// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-64: omp.dispatch.body: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-64: omp.inner.for.cond: +// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-64: omp.inner.for.body: +// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-64-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 +// CHECK45-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-64: omp.body.continue: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-64: omp.inner.for.inc: +// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK45-64: omp.inner.for.end: +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-64: omp.dispatch.inc: +// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-64: omp.dispatch.end: +// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-64: .omp.final.then: +// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-64-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK45-64-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 +// CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] +// CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4 +// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-64: .omp.final.done: +// CHECK45-64-NEXT: br label [[OMP_PRECOND_END]] +// CHECK45-64: omp.precond.end: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45 +// CHECK45-64-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-64-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-64: omp.dispatch.cond: +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK45-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-64: cond.true: +// CHECK45-64-NEXT: br label [[COND_END:%.*]] +// CHECK45-64: cond.false: +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: br label [[COND_END]] +// CHECK45-64: cond.end: +// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-64: omp.dispatch.body: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-64: omp.inner.for.cond: +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-64: omp.inner.for.body: +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-64: omp.body.continue: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-64: omp.inner.for.inc: +// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK45-64: omp.inner.for.end: +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-64: omp.dispatch.inc: +// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-64: omp.dispatch.end: +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK45-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-64: .omp.final.then: +// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-64: .omp.final.done: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50 +// CHECK45-64-SAME: (ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-64: user_code.entry: +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-64-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: ret void +// CHECK45-64: worker.exit: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR1]] { +// CHECK45-64-NEXT: entry: +// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-64: omp.dispatch.cond: +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK45-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-64: cond.true: +// CHECK45-64-NEXT: br label [[COND_END:%.*]] +// CHECK45-64: cond.false: +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: br label [[COND_END]] +// CHECK45-64: cond.end: +// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-64-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-64: omp.dispatch.body: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-64: omp.inner.for.cond: +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-64-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-64: omp.inner.for.body: +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK45-64-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK45-64-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] +// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] +// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] +// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-64: omp.body.continue: +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-64: omp.inner.for.inc: +// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK45-64: omp.inner.for.end: +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-64: omp.dispatch.inc: +// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-64: omp.dispatch.end: +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK45-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK45-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-64: .omp.final.then: +// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-64-NEXT: store i32 10, ptr [[J]], align 4 +// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-64: .omp.final.done: +// CHECK45-64-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK45-32-SAME: (i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-32-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-32-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-32-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK45-32: omp.precond.then: +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-32: omp.dispatch.cond: +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-32: cond.true: +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-NEXT: br label [[COND_END:%.*]] +// CHECK45-32: cond.false: +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: br label [[COND_END]] +// CHECK45-32: cond.end: +// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-32: omp.dispatch.body: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32: omp.inner.for.cond: +// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-32-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32: omp.inner.for.body: +// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32: omp.body.continue: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32: omp.inner.for.inc: +// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK45-32: omp.inner.for.end: +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-32: omp.dispatch.inc: +// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-32: omp.dispatch.end: +// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32: .omp.final.then: +// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-32-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK45-32-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 +// CHECK45-32-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK45-32-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 +// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-32: .omp.final.done: +// CHECK45-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK45-32-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK45-32: .omp.lastprivate.then: +// CHECK45-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK45-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK45-32: .omp.lastprivate.done: +// CHECK45-32-NEXT: br label [[OMP_PRECOND_END]] +// CHECK45-32: omp.precond.end: +// CHECK45-32-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4) +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40 +// CHECK45-32-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-32-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK45-32: omp.precond.then: +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-32: omp.dispatch.cond: +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-32: cond.true: +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: br label [[COND_END:%.*]] +// CHECK45-32: cond.false: +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: br label [[COND_END]] +// CHECK45-32: cond.end: +// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-32: omp.dispatch.body: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32: omp.inner.for.cond: +// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32: omp.inner.for.body: +// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK45-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-32-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 +// CHECK45-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32: omp.body.continue: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32: omp.inner.for.inc: +// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK45-32: omp.inner.for.end: +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-32: omp.dispatch.inc: +// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-32: omp.dispatch.end: +// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32: .omp.final.then: +// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-32-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK45-32-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 +// CHECK45-32-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] +// CHECK45-32-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4 +// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-32: .omp.final.done: +// CHECK45-32-NEXT: br label [[OMP_PRECOND_END]] +// CHECK45-32: omp.precond.end: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45 +// CHECK45-32-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-32-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-32: omp.dispatch.cond: +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK45-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-32: cond.true: +// CHECK45-32-NEXT: br label [[COND_END:%.*]] +// CHECK45-32: cond.false: +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: br label [[COND_END]] +// CHECK45-32: cond.end: +// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-32: omp.dispatch.body: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32: omp.inner.for.cond: +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32: omp.inner.for.body: +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32: omp.body.continue: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32: omp.inner.for.inc: +// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK45-32: omp.inner.for.end: +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-32: omp.dispatch.inc: +// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-32: omp.dispatch.end: +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK45-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32: .omp.final.then: +// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-32: .omp.final.done: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50 +// CHECK45-32-SAME: (ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32: user_code.entry: +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-32-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: ret void +// CHECK45-32: worker.exit: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] { +// CHECK45-32-NEXT: entry: +// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-32: omp.dispatch.cond: +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK45-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-32: cond.true: +// CHECK45-32-NEXT: br label [[COND_END:%.*]] +// CHECK45-32: cond.false: +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: br label [[COND_END]] +// CHECK45-32: cond.end: +// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-32-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-32: omp.dispatch.body: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32: omp.inner.for.cond: +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32: omp.inner.for.body: +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK45-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK45-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] +// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] +// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] +// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]] +// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32: omp.body.continue: +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32: omp.inner.for.inc: +// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK45-32: omp.inner.for.end: +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-32: omp.dispatch.inc: +// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-32: omp.dispatch.end: +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK45-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK45-32-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32: .omp.final.then: +// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-32-NEXT: store i32 10, ptr [[J]], align 4 +// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-32: .omp.final.done: +// CHECK45-32-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK45-32-EX-SAME: (i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-32-EX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-32-EX-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-32-EX-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK45-32-EX: omp.precond.then: +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-32-EX: omp.dispatch.cond: +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-32-EX: cond.true: +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK45-32-EX: cond.false: +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: br label [[COND_END]] +// CHECK45-32-EX: cond.end: +// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-32-EX: omp.dispatch.body: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32-EX: omp.inner.for.cond: +// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-32-EX-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32-EX: omp.inner.for.body: +// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32-EX: omp.body.continue: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32-EX: omp.inner.for.inc: +// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK45-32-EX: omp.inner.for.end: +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-32-EX: omp.dispatch.inc: +// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-32-EX: omp.dispatch.end: +// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-EX: .omp.final.then: +// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-32-EX-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK45-32-EX-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 +// CHECK45-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK45-32-EX-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 +// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-32-EX: .omp.final.done: +// CHECK45-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK45-32-EX: .omp.lastprivate.then: +// CHECK45-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK45-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK45-32-EX: .omp.lastprivate.done: +// CHECK45-32-EX-NEXT: br label [[OMP_PRECOND_END]] +// CHECK45-32-EX: omp.precond.end: +// CHECK45-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4) +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40 +// CHECK45-32-EX-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-32-EX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK45-32-EX: omp.precond.then: +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-32-EX: omp.dispatch.cond: +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-32-EX: cond.true: +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK45-32-EX: cond.false: +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: br label [[COND_END]] +// CHECK45-32-EX: cond.end: +// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-32-EX: omp.dispatch.body: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32-EX: omp.inner.for.cond: +// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32-EX: omp.inner.for.body: +// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK45-32-EX-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 +// CHECK45-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32-EX: omp.body.continue: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32-EX: omp.inner.for.inc: +// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK45-32-EX: omp.inner.for.end: +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-32-EX: omp.dispatch.inc: +// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-32-EX: omp.dispatch.end: +// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-EX: .omp.final.then: +// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-32-EX-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK45-32-EX-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 +// CHECK45-32-EX-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] +// CHECK45-32-EX-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4 +// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-32-EX: .omp.final.done: +// CHECK45-32-EX-NEXT: br label [[OMP_PRECOND_END]] +// CHECK45-32-EX: omp.precond.end: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45 +// CHECK45-32-EX-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-32-EX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-32-EX: omp.dispatch.cond: +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-32-EX: cond.true: +// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK45-32-EX: cond.false: +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: br label [[COND_END]] +// CHECK45-32-EX: cond.end: +// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-32-EX: omp.dispatch.body: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32-EX: omp.inner.for.cond: +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32-EX: omp.inner.for.body: +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32-EX: omp.body.continue: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32-EX: omp.inner.for.inc: +// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK45-32-EX: omp.inner.for.end: +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-32-EX: omp.dispatch.inc: +// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-32-EX: omp.dispatch.end: +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-EX: .omp.final.then: +// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-32-EX: .omp.final.done: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50 +// CHECK45-32-EX-SAME: (ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK45-32-EX: user_code.entry: +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK45-32-EX-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: ret void +// CHECK45-32-EX: worker.exit: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-NEXT: entry: +// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK45-32-EX: omp.dispatch.cond: +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK45-32-EX: cond.true: +// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK45-32-EX: cond.false: +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: br label [[COND_END]] +// CHECK45-32-EX: cond.end: +// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK45-32-EX: omp.dispatch.body: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK45-32-EX: omp.inner.for.cond: +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK45-32-EX: omp.inner.for.body: +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK45-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK45-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] +// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] +// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] +// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]] +// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK45-32-EX: omp.body.continue: +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK45-32-EX: omp.inner.for.inc: +// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK45-32-EX: omp.inner.for.end: +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK45-32-EX: omp.dispatch.inc: +// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK45-32-EX: omp.dispatch.end: +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-EX: .omp.final.then: +// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK45-32-EX-NEXT: store i32 10, ptr [[J]], align 4 +// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK45-32-EX: .omp.final.done: +// CHECK45-32-EX-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK-64-SAME: (i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-64-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-64: omp.precond.then: +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-64-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK-64-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 +// CHECK-64-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK-64-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-64: .omp.lastprivate.then: +// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-64: .omp.lastprivate.done: +// CHECK-64-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-64: omp.precond.end: +// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i64 4) +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40 +// CHECK-64-SAME: (i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-64: omp.precond.then: +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-64-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 +// CHECK-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-64-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK-64-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 +// CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] +// CHECK-64-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-64: omp.precond.end: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45 +// CHECK-64-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50 +// CHECK-64-SAME: (ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-64: user_code.entry: +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-64-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: ret void +// CHECK-64: worker.exit: +// CHECK-64-NEXT: ret void +// +// +// CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-64: omp.dispatch.cond: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-64: omp.dispatch.body: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK-64-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK-64-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK-64-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-64: omp.dispatch.inc: +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-64: omp.dispatch.end: +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64: .omp.final.then: +// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-64-NEXT: store i32 10, ptr [[J]], align 4 +// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-64: .omp.final.done: +// CHECK-64-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK-32-SAME: (i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-32: omp.precond.then: +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-32-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK-32-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 +// CHECK-32-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK-32-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-32-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32: .omp.lastprivate.then: +// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-32: .omp.lastprivate.done: +// CHECK-32-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-32: omp.precond.end: +// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4) +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40 +// CHECK-32-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-32: omp.precond.then: +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-32-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 +// CHECK-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-32-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK-32-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 +// CHECK-32-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] +// CHECK-32-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-32: omp.precond.end: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45 +// CHECK-32-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50 +// CHECK-32-SAME: (ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32: user_code.entry: +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: ret void +// CHECK-32: worker.exit: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32: omp.dispatch.cond: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32: omp.dispatch.body: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK-32-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]] +// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32: omp.dispatch.inc: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32: omp.dispatch.end: +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-32-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32: .omp.final.then: +// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-32-NEXT: store i32 10, ptr [[J]], align 4 +// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32: .omp.final.done: +// CHECK-32-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK-32-EX-SAME: (i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-EX-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-EX-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-32-EX: omp.precond.then: +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-32-EX-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK-32-EX-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 +// CHECK-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK-32-EX-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-EX: .omp.lastprivate.then: +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK-32-EX: .omp.lastprivate.done: +// CHECK-32-EX-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-32-EX: omp.precond.end: +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[L1]], i32 4) +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40 +// CHECK-32-EX-SAME: (i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-32-EX: omp.precond.then: +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK-32-EX-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 +// CHECK-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-32-EX-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK-32-EX-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 +// CHECK-32-EX-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] +// CHECK-32-EX-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-32-EX: omp.precond.end: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45 +// CHECK-32-EX-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50 +// CHECK-32-EX-SAME: (ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-32-EX: user_code.entry: +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK-32-EX-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: ret void +// CHECK-32-EX: worker.exit: +// CHECK-32-EX-NEXT: ret void +// +// +// CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK-32-EX: omp.dispatch.cond: +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-EX: cond.true: +// CHECK-32-EX-NEXT: br label [[COND_END:%.*]] +// CHECK-32-EX: cond.false: +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[COND_END]] +// CHECK-32-EX: cond.end: +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK-32-EX: omp.dispatch.body: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX: omp.inner.for.cond: +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX: omp.inner.for.body: +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32-EX: omp.body.continue: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32-EX: omp.inner.for.inc: +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end: +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK-32-EX: omp.dispatch.inc: +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK-32-EX: omp.dispatch.end: +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX: .omp.final.then: +// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 10, ptr [[J]], align 4 +// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK-32-EX: .omp.final.done: +// CHECK-32-EX-NEXT: ret void +//