128 changes: 64 additions & 64 deletions clang/test/OpenMP/nvptx_SPMD_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,31 @@
int a;

// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds
// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds
// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1

void foo() {
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
#pragma omp target teams distribute parallel for simd if(a)
Expand All @@ -60,25 +60,25 @@ void foo() {
for (int i = 0; i < 10; ++i)
;
int a;
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
#pragma omp target teams distribute parallel for lastprivate(a)
Expand All @@ -102,25 +102,25 @@ int a;
#pragma omp target teams distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init(
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
#pragma omp target teams
Expand Down Expand Up @@ -162,25 +162,25 @@ int a;
#pragma omp distribute parallel for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
#pragma omp target teams
Expand Down Expand Up @@ -211,25 +211,25 @@ int a;
#pragma omp distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[DISTR_FULL]]
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
#pragma omp target
#pragma omp teams
Expand Down Expand Up @@ -266,19 +266,19 @@ int a;
#pragma omp distribute parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
#pragma omp target parallel for if(a)
for (int i = 0; i < 10; ++i)
Expand All @@ -301,25 +301,25 @@ int a;
#pragma omp target parallel for schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
#pragma omp target parallel if(a)
Expand Down Expand Up @@ -350,25 +350,25 @@ int a;
#pragma omp for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK-DAG: [[BAR_FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
#pragma omp target
#pragma omp parallel
Expand Down Expand Up @@ -405,17 +405,17 @@ int a;
#pragma omp for simd schedule(guided)
for (int i = 0; i < 10; ++i)
;
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-DAG: [[FULL]]
#pragma omp target
#pragma omp parallel for
Expand Down
24 changes: 12 additions & 12 deletions clang/test/OpenMP/nvptx_data_sharing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void test_ds(){
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
Expand All @@ -49,14 +49,14 @@ void test_ds(){
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK-NEXT: store i32 100, ptr [[B]], align 4
// CHECK-NEXT: store i32 1000, ptr [[C]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0
// CHECK-NEXT: store ptr [[B]], ptr [[TMP5]], align 8
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1
// CHECK-NEXT: store ptr [[A]], ptr [[TMP7]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0
// CHECK-NEXT: store ptr [[B]], ptr [[TMP3]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1
// CHECK-NEXT: store ptr [[A]], ptr [[TMP4]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
Expand Down Expand Up @@ -89,8 +89,8 @@ void test_ds(){
// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]]
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK-NEXT: ret void
//
//
Expand Down Expand Up @@ -129,9 +129,9 @@ void test_ds(){
// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8
// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR4]]
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR4]]
// CHECK-NEXT: ret void
//
358 changes: 179 additions & 179 deletions clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp

Large diffs are not rendered by default.

1,032 changes: 516 additions & 516 deletions clang/test/OpenMP/nvptx_lambda_capturing.cpp

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ int main() {
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand All @@ -56,20 +56,20 @@ int main() {
// CHECK1-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -105,13 +105,13 @@ int main() {
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand All @@ -132,20 +132,20 @@ int main() {
// CHECK2-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK2-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down
40 changes: 20 additions & 20 deletions clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ int main() {
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
Expand All @@ -49,7 +49,7 @@ int main() {
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand All @@ -60,11 +60,11 @@ int main() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: ret void
//
//
Expand Down Expand Up @@ -95,8 +95,8 @@ int main() {
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: ret void
//
//
Expand Down Expand Up @@ -126,8 +126,8 @@ int main() {
// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]]
// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK1: atomic_cont:
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4
// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]]
// CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
Expand All @@ -148,8 +148,8 @@ int main() {
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
Expand All @@ -160,7 +160,7 @@ int main() {
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
Expand All @@ -169,7 +169,7 @@ int main() {
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand All @@ -180,11 +180,11 @@ int main() {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: ret void
//
//
Expand Down Expand Up @@ -215,8 +215,8 @@ int main() {
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: ret void
//
//
Expand Down Expand Up @@ -246,8 +246,8 @@ int main() {
// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]]
// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
// CHECK2: atomic_cont:
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4
// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]]
// CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
Expand All @@ -268,7 +268,7 @@ int main() {
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4]]
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
88 changes: 44 additions & 44 deletions clang/test/OpenMP/nvptx_parallel_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,18 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0)
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -192,7 +192,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
Expand All @@ -201,19 +201,19 @@ int bar(int n){
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000
// CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP7]] to i32
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16
// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -255,7 +255,7 @@ int bar(int n){
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
Expand All @@ -266,11 +266,11 @@ int bar(int n){
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -331,8 +331,8 @@ int bar(int n){
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR3]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
Expand All @@ -344,18 +344,18 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0)
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -455,7 +455,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
Expand All @@ -464,19 +464,19 @@ int bar(int n){
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000
// CHECK2-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1
// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16
// CHECK2-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -518,7 +518,7 @@ int bar(int n){
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
Expand All @@ -529,11 +529,11 @@ int bar(int n){
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -594,7 +594,7 @@ int bar(int n){
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: ret void
//
20 changes: 10 additions & 10 deletions clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ int bar(int n){
// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
Expand All @@ -51,15 +51,15 @@ int bar(int n){
// CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK-NEXT: store ptr [[D]], ptr [[TMP6]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK-NEXT: store ptr [[D]], ptr [[TMP5]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 3
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
Expand Down Expand Up @@ -168,9 +168,9 @@ int bar(int n){
// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8
// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: ret void
//
298 changes: 149 additions & 149 deletions clang/test/OpenMP/nvptx_target_codegen.cpp

Large diffs are not rendered by default.

48 changes: 24 additions & 24 deletions clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -103,19 +103,19 @@ int bar(int n){
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -159,15 +159,15 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -204,19 +204,19 @@ int bar(int n){
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down
48 changes: 24 additions & 24 deletions clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -100,20 +100,20 @@ int bar(int n){
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -157,15 +157,15 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -204,20 +204,20 @@ int bar(int n){
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down
12 changes: 6 additions & 6 deletions clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ int bar(int n){

// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}(
//
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
//
//
// define internal void [[PFN]](
Expand Down Expand Up @@ -233,8 +233,8 @@ int bar(int n){

// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}(
//
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
//
//
// define internal void [[PFN1]](
Expand Down Expand Up @@ -490,8 +490,8 @@ int bar(int n){

// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}(
//
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
//
//
// define internal void [[PFN2]](
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ void test() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]]
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -527,15 +527,15 @@ void test() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]]
// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down
24 changes: 12 additions & 12 deletions clang/test/OpenMP/nvptx_target_printf_codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[FMT:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
Expand All @@ -61,7 +61,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2
// CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
Expand All @@ -70,12 +70,12 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25
// CHECK-64-SAME: () #[[ATTR0]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
Expand All @@ -87,7 +87,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: [[FOO_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8
// CHECK-64-NEXT: store i64 [[FOO]], ptr [[FOO_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
Expand All @@ -102,7 +102,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64: if.end:
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
//
//
Expand All @@ -114,7 +114,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[FMT:%.*]] = alloca ptr, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
Expand All @@ -127,7 +127,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2
// CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8
// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
Expand All @@ -136,12 +136,12 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25
// CHECK-32-SAME: () #[[ATTR0]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
Expand All @@ -153,7 +153,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8
// CHECK-32-NEXT: store i32 [[FOO]], ptr [[FOO_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
Expand All @@ -168,6 +168,6 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32: if.end:
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
//
16 changes: 8 additions & 8 deletions clang/test/OpenMP/nvptx_target_simd_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,32 +61,32 @@ int bar(int n){
}

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l32}}(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-NOT: call void @__kmpc_for_static_init
// CHECK-NOT: call void @__kmpc_for_static_fini
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l37}}(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-NOT: call void @__kmpc_for_static_init
// CHECK-NOT: call void @__kmpc_for_static_fini
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l42}}(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-NOT: call void @__kmpc_for_static_init
// CHECK-NOT: call void @__kmpc_for_static_fini
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l47}}(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK-NOT: call void @__kmpc_for_static_init
// CHECK-NOT: call void @__kmpc_for_static_fini
// CHECK-NOT: call void @__kmpc_nvptx_end_reduce_nowait(
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)
// CHECK: ret void

#endif
52 changes: 26 additions & 26 deletions clang/test/OpenMP/nvptx_target_teams_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
Expand All @@ -68,7 +68,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand All @@ -95,7 +95,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
Expand All @@ -106,7 +106,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand All @@ -133,18 +133,18 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand All @@ -162,9 +162,9 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: ret void
//
//
Expand All @@ -181,9 +181,9 @@ int bar(int n){
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: ret void
//
//
Expand All @@ -209,7 +209,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
Expand All @@ -220,7 +220,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand All @@ -247,7 +247,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
Expand All @@ -258,7 +258,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand All @@ -285,18 +285,18 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand All @@ -314,9 +314,9 @@ int bar(int n){
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: ret void
//
//
Expand All @@ -333,9 +333,9 @@ int bar(int n){
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: ret void
//
//
Expand Down
24 changes: 12 additions & 12 deletions clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ int bar(int n){
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -104,8 +104,8 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
Expand Down Expand Up @@ -145,8 +145,8 @@ int bar(int n){
// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR5]]
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR5]]
// CHECK1-NEXT: ret void
//
//
Expand All @@ -155,15 +155,15 @@ int bar(int n){
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -222,8 +222,8 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
Expand Down Expand Up @@ -263,7 +263,7 @@ int bar(int n){
// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR5]]
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR5]]
// CHECK2-NEXT: ret void
//
Loading