3,828 changes: 6 additions & 3,822 deletions clang/test/OpenMP/cancel_codegen.cpp

Large diffs are not rendered by default.

632 changes: 1 addition & 631 deletions clang/test/OpenMP/cancellation_point_codegen.cpp

Large diffs are not rendered by default.

6,026 changes: 8 additions & 6,018 deletions clang/test/OpenMP/distribute_codegen.cpp

Large diffs are not rendered by default.

1,664 changes: 4 additions & 1,660 deletions clang/test/OpenMP/distribute_firstprivate_codegen.cpp

Large diffs are not rendered by default.

1,732 changes: 4 additions & 1,728 deletions clang/test/OpenMP/distribute_lastprivate_codegen.cpp

Large diffs are not rendered by default.

12,235 changes: 4 additions & 12,231 deletions clang/test/OpenMP/distribute_parallel_for_codegen.cpp

Large diffs are not rendered by default.

2,380 changes: 4 additions & 2,376 deletions clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp

Large diffs are not rendered by default.

9,093 changes: 7 additions & 9,086 deletions clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp

Large diffs are not rendered by default.

2,542 changes: 4 additions & 2,538 deletions clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp

Large diffs are not rendered by default.

4,436 changes: 4 additions & 4,432 deletions clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp

Large diffs are not rendered by default.

1,714 changes: 4 additions & 1,710 deletions clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp

Large diffs are not rendered by default.

465 changes: 1 addition & 464 deletions clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp

Large diffs are not rendered by default.

621 changes: 1 addition & 620 deletions clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp

Large diffs are not rendered by default.

15,247 changes: 8 additions & 15,239 deletions clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp

Large diffs are not rendered by default.

3,106 changes: 7 additions & 3,099 deletions clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp

Large diffs are not rendered by default.

7,282 changes: 8 additions & 7,274 deletions clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp

Large diffs are not rendered by default.

3,520 changes: 8 additions & 3,512 deletions clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp

Large diffs are not rendered by default.

6,818 changes: 10 additions & 6,808 deletions clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp

Large diffs are not rendered by default.

2,540 changes: 8 additions & 2,532 deletions clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp

Large diffs are not rendered by default.

614 changes: 2 additions & 612 deletions clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp

Large diffs are not rendered by default.

1,308 changes: 4 additions & 1,304 deletions clang/test/OpenMP/distribute_private_codegen.cpp

Large diffs are not rendered by default.

10,094 changes: 20 additions & 10,074 deletions clang/test/OpenMP/distribute_simd_codegen.cpp

Large diffs are not rendered by default.

2,368 changes: 8 additions & 2,360 deletions clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp

Large diffs are not rendered by default.

2,668 changes: 8 additions & 2,660 deletions clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp

Large diffs are not rendered by default.

2,212 changes: 8 additions & 2,204 deletions clang/test/OpenMP/distribute_simd_private_codegen.cpp

Large diffs are not rendered by default.

851 changes: 6 additions & 845 deletions clang/test/OpenMP/distribute_simd_reduction_codegen.cpp

Large diffs are not rendered by default.

595 changes: 1 addition & 594 deletions clang/test/OpenMP/for_firstprivate_codegen.cpp

Large diffs are not rendered by default.

3,762 changes: 4 additions & 3,758 deletions clang/test/OpenMP/for_lastprivate_codegen.cpp

Large diffs are not rendered by default.

655 changes: 1 addition & 654 deletions clang/test/OpenMP/for_linear_codegen.cpp

Large diffs are not rendered by default.

491 changes: 1 addition & 490 deletions clang/test/OpenMP/for_private_codegen.cpp

Large diffs are not rendered by default.

3,917 changes: 1 addition & 3,916 deletions clang/test/OpenMP/for_reduction_codegen.cpp

Large diffs are not rendered by default.

3,633 changes: 2 additions & 3,631 deletions clang/test/OpenMP/for_reduction_codegen_UDR.cpp

Large diffs are not rendered by default.

530 changes: 1 addition & 529 deletions clang/test/OpenMP/for_reduction_task_codegen.cpp

Large diffs are not rendered by default.

679 changes: 1 addition & 678 deletions clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp

Large diffs are not rendered by default.

810 changes: 2 additions & 808 deletions clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp

Large diffs are not rendered by default.

345 changes: 11 additions & 334 deletions clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp

Large diffs are not rendered by default.

366 changes: 1 addition & 365 deletions clang/test/OpenMP/nvptx_lambda_capturing.cpp

Large diffs are not rendered by default.

80 changes: 1 addition & 79 deletions clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2

// expected-no-diagnostics
#ifndef HEADER
Expand Down Expand Up @@ -180,81 +180,3 @@ int main() {
// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21
// CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP2]], i32 0)
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z3usev
// CHECK3-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
// CHECK3-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i32 0)
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK3-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: call void @_Z3usev() #[[ATTR8]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: call void @_Z4workv() #[[ATTR8]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
// CHECK3-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: ret void
//
133 changes: 1 addition & 132 deletions clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2

// expected-no-diagnostics
#ifndef HEADER
Expand Down Expand Up @@ -294,134 +294,3 @@ int main() {
// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25
// CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7:[0-9]+]]
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK3-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP5]], i32 1)
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z3usePi
// CHECK3-SAME: (i32* noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8*
// CHECK3-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i32 1)
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
// CHECK3-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
// CHECK3-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4:[0-9]+]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4
// CHECK3-NEXT: call void @_Z4workPi(i32* noundef [[TMP1]]) #[[ATTR7]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z4workPi
// CHECK3-SAME: (i32* noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8*
// CHECK3-NEXT: call void @__atomic_load(i32 noundef 4, i8* noundef [[TMP1]], i8* noundef [[TMP2]], i32 noundef 0) #[[ATTR7]]
// CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK3: atomic_cont:
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK3-NEXT: store i32 [[ADD]], i32* [[ATOMIC_TEMP1]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8*
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8*
// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8*
// CHECK3-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, i8* noundef [[TMP4]], i8* noundef [[TMP5]], i8* noundef [[TMP6]], i32 noundef 0, i32 noundef 0) #[[ATTR7]]
// CHECK3-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
// CHECK3: atomic_exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
// CHECK3-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32***
// CHECK3-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 4
// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
397 changes: 7 additions & 390 deletions clang/test/OpenMP/nvptx_target_codegen.cpp

Large diffs are not rendered by default.

436 changes: 4 additions & 432 deletions clang/test/OpenMP/nvptx_target_parallel_codegen.cpp

Large diffs are not rendered by default.

449 changes: 4 additions & 445 deletions clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp

Large diffs are not rendered by default.

2,472 changes: 2 additions & 2,470 deletions clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp

Large diffs are not rendered by default.

167 changes: 1 addition & 166 deletions clang/test/OpenMP/nvptx_target_teams_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
Expand Down Expand Up @@ -378,168 +378,3 @@ int bar(int n){
// CHECK2-NEXT: store i16 1, i16* [[TMP0]], align 2
// CHECK2-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23
// CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8*
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8*
// CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8*
// CHECK3-NEXT: store i8 49, i8* [[CONV]], align 1
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28
// CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-NEXT: store i16 1, i16* [[CONV]], align 2
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33
// CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP1:%.*]] = bitcast i16* [[CONV]] to i8*
// CHECK3-NEXT: store i8* [[TMP1]], i8** [[TMP0]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP4]], i32 1)
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8*
// CHECK3-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP5]], i32 1)
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4
// CHECK3-NEXT: store i16 1, i16* [[TMP0]], align 2
// CHECK3-NEXT: ret void
//
124 changes: 1 addition & 123 deletions clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
Expand Down Expand Up @@ -275,125 +275,3 @@ int bar(int n){
// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR5]]
// CHECK2-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16
// CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: [[I:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4)
// CHECK3-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32*
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK3-NEXT: store i32 [[ADD]], i32* [[I_ON_STACK]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i32* [[I_ON_STACK]] to i8*
// CHECK3-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4
// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP10]], i32 1)
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[I]], i32 4)
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
// CHECK3-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4
// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2
// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0
// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4
// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR5]]
// CHECK3-NEXT: ret void
//
1,536 changes: 1 addition & 1,535 deletions clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1,040 changes: 1 addition & 1,039 deletions clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp

Large diffs are not rendered by default.

2,852 changes: 5 additions & 2,847 deletions clang/test/OpenMP/ordered_codegen.cpp

Large diffs are not rendered by default.

1,015 changes: 2 additions & 1,013 deletions clang/test/OpenMP/parallel_copyin_codegen.cpp

Large diffs are not rendered by default.

1,390 changes: 2 additions & 1,388 deletions clang/test/OpenMP/parallel_firstprivate_codegen.cpp

Large diffs are not rendered by default.

2,663 changes: 3 additions & 2,660 deletions clang/test/OpenMP/parallel_for_codegen.cpp

Large diffs are not rendered by default.

250 changes: 1 addition & 249 deletions clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp

Large diffs are not rendered by default.

322 changes: 1 addition & 321 deletions clang/test/OpenMP/parallel_for_linear_codegen.cpp

Large diffs are not rendered by default.

526 changes: 1 addition & 525 deletions clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp

Large diffs are not rendered by default.

249 changes: 3 additions & 246 deletions clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp

Large diffs are not rendered by default.

688 changes: 4 additions & 684 deletions clang/test/OpenMP/parallel_if_codegen.cpp

Large diffs are not rendered by default.

354 changes: 7 additions & 347 deletions clang/test/OpenMP/parallel_master_codegen.cpp

Large diffs are not rendered by default.

481 changes: 1 addition & 480 deletions clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp

Large diffs are not rendered by default.

928 changes: 1 addition & 927 deletions clang/test/OpenMP/parallel_master_taskloop_codegen.cpp

Large diffs are not rendered by default.

836 changes: 1 addition & 835 deletions clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp

Large diffs are not rendered by default.

902 changes: 1 addition & 901 deletions clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp

Large diffs are not rendered by default.

1,236 changes: 2 additions & 1,234 deletions clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp

Large diffs are not rendered by default.

415 changes: 1 addition & 414 deletions clang/test/OpenMP/parallel_private_codegen.cpp

Large diffs are not rendered by default.

1,382 changes: 1 addition & 1,381 deletions clang/test/OpenMP/parallel_reduction_codegen.cpp

Large diffs are not rendered by default.

472 changes: 1 addition & 471 deletions clang/test/OpenMP/parallel_reduction_task_codegen.cpp

Large diffs are not rendered by default.

164 changes: 1 addition & 163 deletions clang/test/OpenMP/parallel_sections_codegen.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1

// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s
Expand Down Expand Up @@ -200,165 +200,3 @@ int main() {
// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP11]]) #[[ATTR7]]
// CHECK1-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3foov
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void @_Z8mayThrowv()
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3barv
// CHECK2-SAME: () #[[ATTR0]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void @_Z8mayThrowv()
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@main
// CHECK2-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
// CHECK2-NEXT: ret i32 [[CALL]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
// CHECK2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK2-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK2-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]]
// CHECK2-NEXT: ]
// CHECK2: .omp.sections.case:
// CHECK2-NEXT: invoke void @_Z3foov()
// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK2: .omp.sections.case1:
// CHECK2-NEXT: invoke void @_Z3barv()
// CHECK2-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]]
// CHECK2: invoke.cont2:
// CHECK2-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK2: .omp.sections.exit:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK2-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: ret void
// CHECK2: terminate.lpad:
// CHECK2-NEXT: [[TMP10:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: catch i8* null
// CHECK2-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 0
// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP11]]) #[[ATTR7:[0-9]+]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate
// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] comdat {
// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR5:[0-9]+]]
// CHECK2-NEXT: call void @_ZSt9terminatev() #[[ATTR7]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
// CHECK2-SAME: () #[[ATTR6:[0-9]+]] comdat {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
// CHECK2-NEXT: ret i32 0
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
// CHECK2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
// CHECK2-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
// CHECK2-NEXT: ]
// CHECK2: .omp.sections.case:
// CHECK2-NEXT: invoke void @_Z3foov()
// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
// CHECK2: .omp.sections.exit:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK2-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: ret void
// CHECK2: terminate.lpad:
// CHECK2-NEXT: [[TMP10:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: catch i8* null
// CHECK2-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 0
// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP11]]) #[[ATTR7]]
// CHECK2-NEXT: unreachable
//
514 changes: 1 addition & 513 deletions clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp

Large diffs are not rendered by default.

553 changes: 1 addition & 552 deletions clang/test/OpenMP/sections_firstprivate_codegen.cpp

Large diffs are not rendered by default.

1,332 changes: 4 additions & 1,328 deletions clang/test/OpenMP/sections_lastprivate_codegen.cpp

Large diffs are not rendered by default.

391 changes: 1 addition & 390 deletions clang/test/OpenMP/sections_private_codegen.cpp

Large diffs are not rendered by default.

814 changes: 1 addition & 813 deletions clang/test/OpenMP/sections_reduction_codegen.cpp

Large diffs are not rendered by default.

518 changes: 1 addition & 517 deletions clang/test/OpenMP/sections_reduction_task_codegen.cpp

Large diffs are not rendered by default.

976 changes: 1 addition & 975 deletions clang/test/OpenMP/single_codegen.cpp

Large diffs are not rendered by default.

484 changes: 1 addition & 483 deletions clang/test/OpenMP/single_firstprivate_codegen.cpp

Large diffs are not rendered by default.

318 changes: 1 addition & 317 deletions clang/test/OpenMP/single_private_codegen.cpp

Large diffs are not rendered by default.

1,870 changes: 2 additions & 1,868 deletions clang/test/OpenMP/target_codegen_global_capture.cpp

Large diffs are not rendered by default.

840 changes: 10 additions & 830 deletions clang/test/OpenMP/target_map_codegen_03.cpp

Large diffs are not rendered by default.

8,463 changes: 12 additions & 8,451 deletions clang/test/OpenMP/target_parallel_codegen.cpp

Large diffs are not rendered by default.

11,156 changes: 10 additions & 11,146 deletions clang/test/OpenMP/target_parallel_for_codegen.cpp

Large diffs are not rendered by default.

539 changes: 1 addition & 538 deletions clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp

Large diffs are not rendered by default.

15,870 changes: 20 additions & 15,850 deletions clang/test/OpenMP/target_parallel_for_simd_codegen.cpp

Large diffs are not rendered by default.

4,647 changes: 12 additions & 4,635 deletions clang/test/OpenMP/target_parallel_if_codegen.cpp

Large diffs are not rendered by default.

3,825 changes: 12 additions & 3,813 deletions clang/test/OpenMP/target_parallel_num_threads_codegen.cpp

Large diffs are not rendered by default.

485 changes: 1 addition & 484 deletions clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp

Large diffs are not rendered by default.

11,250 changes: 12 additions & 11,238 deletions clang/test/OpenMP/target_teams_codegen.cpp

Large diffs are not rendered by default.

15,129 changes: 12 additions & 15,117 deletions clang/test/OpenMP/target_teams_distribute_codegen.cpp

Large diffs are not rendered by default.

1,157 changes: 4 additions & 1,153 deletions clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp

Large diffs are not rendered by default.

2,553 changes: 4 additions & 2,549 deletions clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp

Large diffs are not rendered by default.

1,827 changes: 3 additions & 1,824 deletions clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp

Large diffs are not rendered by default.

1,749 changes: 4 additions & 1,745 deletions clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp

Large diffs are not rendered by default.

2,134 changes: 4 additions & 2,130 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp

Large diffs are not rendered by default.

1,709 changes: 4 additions & 1,705 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

3,909 changes: 5 additions & 3,904 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp

Large diffs are not rendered by default.

9,282 changes: 7 additions & 9,275 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp

Large diffs are not rendered by default.

2,609 changes: 4 additions & 2,605 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1

// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
Expand Down Expand Up @@ -179,162 +179,3 @@ void gtid_test() {
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z9gtid_testv
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 100)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
// CHECK2-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK2-NEXT: br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2: omp_offload.failed:
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16() #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]]
// CHECK2: omp_offload.cont:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16
// CHECK2-SAME: () #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK2-SAME: () #[[ATTR3:[0-9]+]] section ".text.startup" {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void @__tgt_register_requires(i64 1)
// CHECK2-NEXT: ret void
//
2,703 changes: 5 additions & 2,698 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1,391 changes: 3 additions & 1,388 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

13,434 changes: 8 additions & 13,426 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp

Large diffs are not rendered by default.

3,357 changes: 10 additions & 3,347 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

7,566 changes: 8 additions & 7,558 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

5,395 changes: 13 additions & 5,382 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

15,872 changes: 12 additions & 15,860 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp

Large diffs are not rendered by default.

1,204 changes: 3 additions & 1,201 deletions clang/test/OpenMP/target_teams_distribute_private_codegen.cpp

Large diffs are not rendered by default.

799 changes: 3 additions & 796 deletions clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp

Large diffs are not rendered by default.

16,988 changes: 20 additions & 16,968 deletions clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp

Large diffs are not rendered by default.

1,773 changes: 8 additions & 1,765 deletions clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp

Large diffs are not rendered by default.

3,583 changes: 8 additions & 3,575 deletions clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp

Large diffs are not rendered by default.

2,648 changes: 6 additions & 2,642 deletions clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp

Large diffs are not rendered by default.

2,665 changes: 8 additions & 2,657 deletions clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp

Large diffs are not rendered by default.

2,139 changes: 6 additions & 2,133 deletions clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp

Large diffs are not rendered by default.

1,050 changes: 6 additions & 1,044 deletions clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp

Large diffs are not rendered by default.

3,047 changes: 4 additions & 3,043 deletions clang/test/OpenMP/target_teams_map_codegen.cpp

Large diffs are not rendered by default.

3,825 changes: 12 additions & 3,813 deletions clang/test/OpenMP/target_teams_num_teams_codegen.cpp

Large diffs are not rendered by default.

3,972 changes: 12 additions & 3,960 deletions clang/test/OpenMP/target_teams_thread_limit_codegen.cpp

Large diffs are not rendered by default.

1,857 changes: 3 additions & 1,854 deletions clang/test/OpenMP/task_if_codegen.cpp

Large diffs are not rendered by default.

673 changes: 1 addition & 672 deletions clang/test/OpenMP/task_in_reduction_codegen.cpp

Large diffs are not rendered by default.

151 changes: 2 additions & 149 deletions clang/test/OpenMP/task_member_call_codegen.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3

// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
Expand Down Expand Up @@ -97,79 +97,6 @@ void c() {
// CHECK1-NEXT: ret i32 0
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z1cv
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[D:%.*]] = alloca [[CLASS_A:%.*]], align 1
// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates*
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP2]], i32 0, i32 1
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP1]])
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_task_privates_map.
// CHECK2-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], %class.a** noalias noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %class.a**, align 8
// CHECK2-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8
// CHECK2-NEXT: store %class.a** [[TMP1]], %class.a*** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP2]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = load %class.a**, %class.a*** [[DOTADDR1]], align 8
// CHECK2-NEXT: store %class.a* [[TMP3]], %class.a** [[TMP4]], align 8
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK2-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK2-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca %class.a*, align 8
// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK2-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1
// CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8*
// CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %class.a**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, %class.a**)*
// CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], %class.a** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: [[TMP16:%.*]] = load %class.a*, %class.a** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
// CHECK2-NEXT: call void @_ZN1a1bEv(%class.a* noundef nonnull align 1 dereferenceable(1) [[TMP16]]) #[[ATTR4]]
// CHECK2-NEXT: ret i32 0
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z1cv
// CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
Expand Down Expand Up @@ -243,77 +170,3 @@ void c() {
// CHECK3-NEXT: call void @_ZN1a1bEv(%class.a* noundef nonnull align 1 dereferenceable(1) [[TMP16]]) #[[ATTR4]]
// CHECK3-NEXT: ret i32 0
//
//
// CHECK4-LABEL: define {{[^@]+}}@_Z1cv
// CHECK4-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[D:%.*]] = alloca [[CLASS_A:%.*]], align 1
// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
// CHECK4-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
// CHECK4-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to %struct.kmp_task_t_with_privates*
// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP1]], i32 0, i32 0
// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP1]], i32 0, i32 1
// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0
// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i8* [[TMP0]])
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_task_privates_map.
// CHECK4-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], %class.a** noalias noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca %class.a**, align 8
// CHECK4-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8
// CHECK4-NEXT: store %class.a** [[TMP1]], %class.a*** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP2]], i32 0, i32 0
// CHECK4-NEXT: [[TMP4:%.*]] = load %class.a**, %class.a*** [[DOTADDR1]], align 8
// CHECK4-NEXT: store %class.a* [[TMP3]], %class.a** [[TMP4]], align 8
// CHECK4-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@.omp_task_entry.
// CHECK4-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK4-NEXT: entry:
// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
// CHECK4-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
// CHECK4-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK4-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
// CHECK4-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca %class.a*, align 8
// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
// CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4
// CHECK4-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
// CHECK4-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1
// CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8*
// CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12
// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %class.a**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, %class.a**)*
// CHECK4-NEXT: call void [[TMP15]](i8* [[TMP14]], %class.a** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]]
// CHECK4-NEXT: [[TMP16:%.*]] = load %class.a*, %class.a** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12
// CHECK4-NEXT: call void @_ZN1a1bEv(%class.a* noundef nonnull align 1 dereferenceable(1) [[TMP16]]) #[[ATTR4]]
// CHECK4-NEXT: ret i32 0
//
74 changes: 2 additions & 72 deletions clang/test/OpenMP/taskgroup_codegen.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=DEBUG1

// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
Expand Down Expand Up @@ -99,76 +99,6 @@ void parallel_taskgroup() {
// CHECK1-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3foov
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void @_Z8mayThrowv()
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@main
// CHECK2-SAME: () #[[ATTR2:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[A:%.*]] = alloca i8, align 1
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4
// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: store i8 2, i8* [[A]], align 1
// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: invoke void @_Z3foov()
// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[A]], align 1
// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// CHECK2-NEXT: ret i32 [[CONV]]
// CHECK2: terminate.lpad:
// CHECK2-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: catch i8* null
// CHECK2-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP3]]) #[[ATTR8:[0-9]+]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate
// CHECK2-SAME: (i8* [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] comdat {
// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP0]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @_ZSt9terminatev() #[[ATTR8]]
// CHECK2-NEXT: unreachable
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv
// CHECK2-SAME: () #[[ATTR6:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: invoke void @_Z3foov()
// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
// CHECK2: invoke.cont:
// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
// CHECK2-NEXT: ret void
// CHECK2: terminate.lpad:
// CHECK2-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 }
// CHECK2-NEXT: catch i8* null
// CHECK2-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP3]]) #[[ATTR8]]
// CHECK2-NEXT: unreachable
//
//
// DEBUG1-LABEL: define {{[^@]+}}@_Z3foov
// DEBUG1-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
// DEBUG1-NEXT: entry:
Expand Down Expand Up @@ -217,7 +147,7 @@ void parallel_taskgroup() {
//
//
// DEBUG1-LABEL: define {{[^@]+}}@.omp_outlined.
// DEBUG1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG24:![0-9]+]] {
// DEBUG1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG23:![0-9]+]] {
// DEBUG1-NEXT: entry:
// DEBUG1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// DEBUG1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
Expand Down
672 changes: 1 addition & 671 deletions clang/test/OpenMP/taskloop_in_reduction_codegen.cpp

Large diffs are not rendered by default.

803 changes: 2 additions & 801 deletions clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp

Large diffs are not rendered by default.

1,956 changes: 12 additions & 1,944 deletions clang/test/OpenMP/teams_codegen.cpp

Large diffs are not rendered by default.

2,094 changes: 8 additions & 2,086 deletions clang/test/OpenMP/teams_distribute_codegen.cpp

Large diffs are not rendered by default.

1,141 changes: 4 additions & 1,137 deletions clang/test/OpenMP/teams_distribute_collapse_codegen.cpp

Large diffs are not rendered by default.

2,499 changes: 4 additions & 2,495 deletions clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp

Large diffs are not rendered by default.

1,831 changes: 3 additions & 1,828 deletions clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp

Large diffs are not rendered by default.

1,714 changes: 4 additions & 1,710 deletions clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp

Large diffs are not rendered by default.

3,184 changes: 8 additions & 3,176 deletions clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp

Large diffs are not rendered by default.

1,677 changes: 4 additions & 1,673 deletions clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp

Large diffs are not rendered by default.

1,023 changes: 3 additions & 1,020 deletions clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp

Large diffs are not rendered by default.

4,089 changes: 4 additions & 4,085 deletions clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp

Large diffs are not rendered by default.

2,458 changes: 3 additions & 2,455 deletions clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp

Large diffs are not rendered by default.

9,373 changes: 7 additions & 9,366 deletions clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp

Large diffs are not rendered by default.

2,524 changes: 4 additions & 2,520 deletions clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp

Large diffs are not rendered by default.

2,292 changes: 2 additions & 2,290 deletions clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp

Large diffs are not rendered by default.

1,688 changes: 3 additions & 1,685 deletions clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp

Large diffs are not rendered by default.

465 changes: 1 addition & 464 deletions clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp

Large diffs are not rendered by default.

1,407 changes: 3 additions & 1,404 deletions clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

13,274 changes: 8 additions & 13,266 deletions clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp

Large diffs are not rendered by default.

4,552 changes: 16 additions & 4,536 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp

Large diffs are not rendered by default.

2,361 changes: 8 additions & 2,353 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp

Large diffs are not rendered by default.

5,289 changes: 8 additions & 5,281 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp

Large diffs are not rendered by default.

3,320 changes: 6 additions & 3,314 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp

Large diffs are not rendered by default.

7,462 changes: 8 additions & 7,454 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp

Large diffs are not rendered by default.

3,494 changes: 8 additions & 3,486 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp

Large diffs are not rendered by default.

3,513 changes: 5 additions & 3,508 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp

Large diffs are not rendered by default.

2,658 changes: 6 additions & 2,652 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1,693 changes: 6 additions & 1,687 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp

Large diffs are not rendered by default.

15,712 changes: 12 additions & 15,700 deletions clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp

Large diffs are not rendered by default.

1,209 changes: 3 additions & 1,206 deletions clang/test/OpenMP/teams_distribute_private_codegen.cpp

Large diffs are not rendered by default.

815 changes: 3 additions & 812 deletions clang/test/OpenMP/teams_distribute_reduction_codegen.cpp

Large diffs are not rendered by default.

4,987 changes: 24 additions & 4,963 deletions clang/test/OpenMP/teams_distribute_simd_codegen.cpp

Large diffs are not rendered by default.

1,757 changes: 8 additions & 1,749 deletions clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp

Large diffs are not rendered by default.

3,529 changes: 8 additions & 3,521 deletions clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp

Large diffs are not rendered by default.

2,656 changes: 6 additions & 2,650 deletions clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp

Large diffs are not rendered by default.

2,638 changes: 8 additions & 2,630 deletions clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp

Large diffs are not rendered by default.

2,144 changes: 6 additions & 2,138 deletions clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp

Large diffs are not rendered by default.

1,066 changes: 6 additions & 1,060 deletions clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp

Large diffs are not rendered by default.

2,511 changes: 6 additions & 2,505 deletions clang/test/OpenMP/teams_firstprivate_codegen.cpp

Large diffs are not rendered by default.

1,342 changes: 4 additions & 1,338 deletions clang/test/OpenMP/teams_private_codegen.cpp

Large diffs are not rendered by default.

18 changes: 17 additions & 1 deletion llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -6241,6 +6242,17 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);

LoopInfo *LI =
A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F);
Optional<bool> MayContainIrreducibleControl;
auto IsInLoop = [&](BasicBlock &BB) {
if (!MayContainIrreducibleControl.has_value())
MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI);
if (MayContainIrreducibleControl.value())
return true;
return LI->getLoopFor(&BB) != nullptr;
};

for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
Expand Down Expand Up @@ -6282,6 +6294,10 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Size = SizeOffsetPair.first;
}

Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent()))
? AI.CB
: &F->getEntryBlock().front();

Align Alignment(1);
if (MaybeAlign RetAlign = AI.CB->getRetAlign())
Alignment = std::max(Alignment, *RetAlign);
Expand All @@ -6296,7 +6312,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
// TODO: Hoist the alloca towards the function entry.
unsigned AS = DL.getAllocaAddrSpace();
Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
Size, Alignment, "", AI.CB);
Size, Alignment, "", IP);

if (Alloca->getType() != AI.CB->getType())
Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,35 @@ define void @test17b() {
ret void
}

define void @move_alloca() {
; IS________OPM-LABEL: define {{[^@]+}}@move_alloca() {
; IS________OPM-NEXT: entry:
; IS________OPM-NEXT: br label [[NOT_ENTRY:%.*]]
; IS________OPM: not_entry:
; IS________OPM-NEXT: [[TMP0:%.*]] = tail call noalias i8* @__kmpc_alloc_shared(i64 noundef 4)
; IS________OPM-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[TMP0]]) #[[ATTR6]]
; IS________OPM-NEXT: tail call void @__kmpc_free_shared(i8* noalias nocapture [[TMP0]], i64 noundef 4)
; IS________OPM-NEXT: ret void
;
; IS________NPM-LABEL: define {{[^@]+}}@move_alloca() {
; IS________NPM-NEXT: entry:
; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1, addrspace(5)
; IS________NPM-NEXT: br label [[NOT_ENTRY:%.*]]
; IS________NPM: not_entry:
; IS________NPM-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8*
; IS________NPM-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR6]]
; IS________NPM-NEXT: ret void
;
entry:
br label %not_entry

not_entry:
%0 = tail call noalias i8* @__kmpc_alloc_shared(i64 4)
tail call void @usei8(i8* nocapture nofree %0) willreturn nounwind nosync
tail call void @__kmpc_free_shared(i8* %0, i64 4)
ret void
}


;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind willreturn }
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3978,16 +3978,16 @@ define dso_local void @test_nested_memory(float* %dst, double* %src) {
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test_nested_memory
; IS__TUNIT_NPM-SAME: (float* nocapture nofree writeonly [[DST:%.*]], double* nocapture nofree readonly [[SRC:%.*]]) {
; IS__TUNIT_NPM-NEXT: entry:
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 24, align 1
; IS__TUNIT_NPM-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast %struct.STy* [[LOCAL]] to i8*
; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = bitcast %struct.STy* [[LOCAL]] to i8*
; IS__TUNIT_NPM-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], %struct.STy* [[LOCAL]], i64 0, i32 2
; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 24, align 1
; IS__TUNIT_NPM-NEXT: [[DST1:%.*]] = bitcast i8* [[TMP1]] to float**
; IS__TUNIT_NPM-NEXT: [[DST1:%.*]] = bitcast i8* [[TMP0]] to float**
; IS__TUNIT_NPM-NEXT: store float* [[DST]], float** [[DST1]], align 8
; IS__TUNIT_NPM-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8
; IS__TUNIT_NPM-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 8
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC2]] to double**
; IS__TUNIT_NPM-NEXT: store double* [[SRC]], double** [[TMP2]], align 8
; IS__TUNIT_NPM-NEXT: store i8* [[TMP1]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8
; IS__TUNIT_NPM-NEXT: store i8* [[TMP0]], i8** bitcast (%struct.STy** getelementptr inbounds ([[STRUCT_STY]], %struct.STy* @global, i64 0, i32 2) to i8**), align 8
; IS__TUNIT_NPM-NEXT: call fastcc void @nested_memory_callee() #[[ATTR15:[0-9]+]]
; IS__TUNIT_NPM-NEXT: ret void
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/OpenMP/spmdization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -702,8 +702,8 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8*
; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32*
; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
Expand All @@ -725,8 +725,8 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32*
; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
Expand All @@ -747,8 +747,8 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8*
; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32*
; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
Expand All @@ -770,8 +770,8 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32*
; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]]
Expand Down