Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
Expand All @@ -57,7 +57,7 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -160,7 +160,7 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
Expand Down Expand Up @@ -341,11 +341,11 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
Expand All @@ -355,7 +355,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -456,7 +456,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
Expand Down

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,33 +70,33 @@ int bar(int n){
}

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l37(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)

// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)

// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void

// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48(
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)

// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_distribute_static_fini(
// CHECK: ret void

// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]])
// CHECK: store {{.+}} [[F_IN]], ptr {{.+}},
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true)
// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false)
// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2)

// CHECK: store {{.+}} 99, ptr [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, ptr [[COMB_UB]],
Expand Down
48 changes: 24 additions & 24 deletions clang/test/OpenMP/nvptx_teams_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
Expand All @@ -95,7 +95,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand All @@ -122,7 +122,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
Expand All @@ -134,7 +134,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand All @@ -161,7 +161,7 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
Expand All @@ -173,7 +173,7 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand All @@ -200,7 +200,7 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
Expand All @@ -212,7 +212,7 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -243,19 +243,19 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK3-NEXT: [[ARGC3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC3]], align 4
// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC3]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC3]], i64 4)
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
Expand Down Expand Up @@ -286,19 +286,19 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
// CHECK3-NEXT: [[ARGC2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC2]], align 8
// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC2]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC2]], i64 8)
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
Expand Down Expand Up @@ -329,7 +329,7 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true)
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
Expand All @@ -341,7 +341,7 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
Expand Down Expand Up @@ -372,7 +372,7 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true)
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
Expand All @@ -384,7 +384,7 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
Expand Down
3,012 changes: 1,506 additions & 1,506 deletions clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions clang/test/OpenMP/reduction_implicit_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ int main()
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
Expand All @@ -112,7 +112,7 @@ int main()
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void spmd(void) {
#pragma omp begin declare target device_type(nohost)
__attribute__((weak))
extern "C" int __kmpc_target_init(void *Ident, char Mode,
bool UseGenericStateMachine, bool) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
bool UseGenericStateMachine) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void spmd(void) {
#pragma omp begin declare target device_type(nohost)
__attribute__((weak))
extern "C" int __kmpc_target_init(void *Ident, char Mode,
bool UseGenericStateMachine, bool) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
bool UseGenericStateMachine) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target
Expand Down
590 changes: 295 additions & 295 deletions clang/test/OpenMP/target_has_device_addr_codegen.cpp

Large diffs are not rendered by default.

84 changes: 42 additions & 42 deletions clang/test/OpenMP/target_parallel_debug_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG47]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG47]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]]
// CHECK1: user_code.entry:
Expand All @@ -105,15 +105,15 @@ int main() {
// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG47]]
Expand Down Expand Up @@ -176,36 +176,36 @@ int main() {
// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG83]]
// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG84:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG85:![0-9]+]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG85]]
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64, !dbg [[DBG85]]
// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG85]]
// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG87:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG88:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG88]]
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG88]]
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG88]]
// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG88]]
// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG90:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG91]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG91]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG91]]
// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG91]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG91]]
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG91]]
// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG93]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG93]]
// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG93]]
// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]]
// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG96:![0-9]+]]
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP15]] to i64, !dbg [[DBG96]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]]
// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG96]]
// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG96]]
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG96]]
// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]]
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG98]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG96]]
// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]]
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP15]] to i1, !dbg [[DBG98]]
// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG98]]
// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP16]], !dbg [[DBG98]]
// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP14]], !dbg [[DBG98]]
// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG98]]
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG98]]
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG98]]
Expand Down Expand Up @@ -308,7 +308,7 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]]
// CHECK1: user_code.entry:
Expand All @@ -318,15 +318,15 @@ int main() {
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP17]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP19]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG137]]
Expand Down Expand Up @@ -517,21 +517,21 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP20]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG212]]
Expand Down
144 changes: 72 additions & 72 deletions clang/test/OpenMP/target_parallel_for_debug_codegen.cpp

Large diffs are not rendered by default.

404 changes: 202 additions & 202 deletions clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp

Large diffs are not rendered by default.

370 changes: 185 additions & 185 deletions clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp

Large diffs are not rendered by default.

634 changes: 317 additions & 317 deletions clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

124 changes: 62 additions & 62 deletions clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,37 +499,37 @@ int main() {
// CHECK1: omp_if.then:
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP17]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP22]] to i1
// CHECK1-NEXT: [[TMP23:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1
// CHECK1-NEXT: [[TMP21:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
// CHECK1-NEXT: store i32 1, ptr [[TMP24]], align 4
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
// CHECK1-NEXT: store i32 1, ptr [[TMP25]], align 4
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP27]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
// CHECK1-NEXT: store i64 100, ptr [[TMP32]], align 8
// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP23]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]])
// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
// CHECK1-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
// CHECK1-NEXT: store i32 1, ptr [[TMP23]], align 4
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP24]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP25]], align 8
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP27]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
// CHECK1-NEXT: store i64 100, ptr [[TMP30]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP21]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]])
// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
// CHECK1-NEXT: br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
// CHECK1: omp_offload.failed7:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP13]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]]
Expand All @@ -539,8 +539,8 @@ int main() {
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP13]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_IF_END]]
// CHECK1: omp_if.end:
// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr @Arg, align 4
// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP35]])
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr @Arg, align 4
// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP33]])
// CHECK1-NEXT: ret i32 [[CALL]]
//
//
Expand Down Expand Up @@ -1037,37 +1037,37 @@ int main() {
// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP14]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP21]] to i1
// CHECK1-NEXT: [[TMP22:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1
// CHECK1-NEXT: [[TMP20:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
// CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK1-NEXT: store i32 1, ptr [[TMP23]], align 4
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK1-NEXT: store i32 1, ptr [[TMP24]], align 4
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP25]], align 8
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP27]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK1-NEXT: store i64 100, ptr [[TMP31]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]])
// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
// CHECK1-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK1-NEXT: store i32 1, ptr [[TMP21]], align 4
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP23]], align 8
// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP24]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP25]], align 8
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP26]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK1-NEXT: store i64 100, ptr [[TMP29]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP20]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]])
// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
// CHECK1-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK1: omp_offload.failed6:
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP13]]) #[[ATTR2]]
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,22 +167,22 @@ void gtid_test() {
// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]]
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

370 changes: 185 additions & 185 deletions clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp

Large diffs are not rendered by default.

950 changes: 475 additions & 475 deletions clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp

Large diffs are not rendered by default.

572 changes: 286 additions & 286 deletions clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp

Large diffs are not rendered by default.

2,200 changes: 1,100 additions & 1,100 deletions clang/test/OpenMP/target_teams_map_codegen.cpp

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1458,17 +1458,13 @@ class OpenMPIRBuilder {
///
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
/// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
bool RequiresFullRuntime);
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);

/// Create a runtime call for kmpc_target_deinit
///
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
/// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
bool RequiresFullRuntime);
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);

///}

Expand Down
4 changes: 2 additions & 2 deletions llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)

/// OpenMP Device runtime functions
__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1)
__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1)
__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1)
__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
Expand Down
14 changes: 4 additions & 10 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3854,8 +3854,7 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
}

OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
bool RequiresFullRuntime) {
OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
if (!updateToLocation(Loc))
return Loc.IP;

Expand All @@ -3867,14 +3866,12 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachine =
ConstantInt::getBool(Int32->getContext(), !IsSPMD);
ConstantInt *RequiresFullRuntimeVal =
ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);

Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_init);

CallInst *ThreadKind = Builder.CreateCall(
Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
Fn, {Ident, IsSPMDVal, UseGenericStateMachine});

Value *ExecUserCode = Builder.CreateICmpEQ(
ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
Expand Down Expand Up @@ -3908,8 +3905,7 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
}

void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
bool IsSPMD,
bool RequiresFullRuntime) {
bool IsSPMD) {
if (!updateToLocation(Loc))
return;

Expand All @@ -3919,13 +3915,11 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *RequiresFullRuntimeVal =
ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);

Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);

Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
Builder.CreateCall(Fn, {Ident, IsSPMDVal});
}

void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
Expand Down
21 changes: 2 additions & 19 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1863,7 +1863,8 @@ struct OpenMPOpt {
if (!UV || UV->size() + (ReplVal != nullptr) < 2)
return false;

LLVM_DEBUG(
F.dump();
(
dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
<< (ReplVal ? " with an existing value\n" : "\n") << "\n");

Expand Down Expand Up @@ -3290,8 +3291,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
constexpr const int InitModeArgNo = 1;
constexpr const int DeinitModeArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
constexpr const int InitRequiresFullRuntimeArgNo = 3;
constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
Expand All @@ -3301,14 +3300,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB,
InitRequiresFullRuntimeArgNo),
IsGenericModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelDeinitCB,
DeinitRequiresFullRuntimeArgNo),
IsGenericModeSimplifyCB);

// Check if we know we are in SPMD-mode already.
ConstantInt *ModeArg =
Expand Down Expand Up @@ -3702,8 +3693,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
const int InitModeArgNo = 1;
const int DeinitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
const int InitRequiresFullRuntimeArgNo = 3;
const int DeinitRequiresFullRuntimeArgNo = 2;

auto &Ctx = getAnchorValue().getContext();
A.changeUseAfterManifest(
Expand All @@ -3717,12 +3706,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
*ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
*ConstantInt::getBool(Ctx, false));
A.changeUseAfterManifest(
KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
*ConstantInt::getBool(Ctx, false));

++NumOpenMPTargetRegionKernelsSPMD;

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/OpenMP/always_inline_device.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK: Function Attrs: convergent norecurse nounwind
; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
Expand All @@ -24,13 +24,13 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: store i8 1, i8* @G, align 1
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

Expand All @@ -42,7 +42,7 @@ user_code.entry: ; preds = %entry
%isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
store i8 %isSPMD, i8* @G
call void @bar() #2
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void

worker.exit: ; preds = %entry
Expand All @@ -51,9 +51,9 @@ worker.exit: ; preds = %entry

declare i8 @__kmpc_is_spmd_exec_mode()

declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)

declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8)

; Function Attrs: convergent nounwind
define hidden void @bar() #1 {
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/OpenMP/barrier_removal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ define void @pos_multiple() {
; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
Expand Down
184 changes: 102 additions & 82 deletions llvm/test/Transforms/OpenMP/custom_state_machines.ll

Large diffs are not rendered by default.

100 changes: 50 additions & 50 deletions llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ target triple = "nvptx64"
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18

Expand All @@ -77,12 +77,12 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
call void @unknown() #6, !dbg !27
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28
br label %common.ret
}

; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
ret i32 0
}

Expand All @@ -102,13 +102,13 @@ entry:
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3

declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr
declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr

; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33

Expand All @@ -133,7 +133,7 @@ user_code.entry: ; preds = %entry
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
call void @no_openmp()
call void @no_parallelism()
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46
br label %common.ret
}

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/OpenMP/deduplication_target.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,37 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false, i1 true)
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
%2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2)
ret void

worker.exit: ; preds = %entry
ret void
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)

declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #1

declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8)

attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #1 = { nounwind }
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false)
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
Expand All @@ -20,11 +20,11 @@ define void @kernel() {
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false)
; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false)
; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK-DISABLED: if.then:
Expand All @@ -34,10 +34,10 @@ define void @kernel() {
; CHECK-DISABLED-NEXT: call void @bar()
; CHECK-DISABLED-NEXT: br label [[IF_END]]
; CHECK-DISABLED: if.end:
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-DISABLED-NEXT: ret void
;
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false)
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
Expand All @@ -47,7 +47,7 @@ if.else:
call void @bar()
br label %if.end
if.end:
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}

Expand Down Expand Up @@ -135,9 +135,9 @@ declare i8 @__kmpc_is_generic_main_thread_id(i32)

declare i32 @__kmpc_get_hardware_thread_id()

declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)

declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8)

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ target triple = "nvptx64"
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
Expand All @@ -28,15 +28,15 @@ define weak void @kernel0() #0 {
; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]]
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;

%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}

Expand All @@ -45,21 +45,21 @@ define weak void @kernel0() #0 {
define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
; CHECK: exit.threads:
; CHECK-NEXT: ret void
; CHECK: main.thread.user_code:
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;

%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @helper1()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}

Expand All @@ -70,7 +70,7 @@ define weak void @kernel2() #0 {
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -82,12 +82,12 @@ define weak void @kernel2() #0 {
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -101,7 +101,7 @@ user_code.entry:
call void @helper1()
call void @helper2()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}

Expand Down Expand Up @@ -202,8 +202,8 @@ define internal i32 @__kmpc_get_hardware_num_threads_in_block() {
ret i32 %ret
}
declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) #1
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8) #1
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ target triple = "nvptx64"
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true)
ret void
}

Expand All @@ -36,14 +36,14 @@ define weak void @kernel0() #0 {
define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
call void @helper1()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
ret void
}

Expand All @@ -52,18 +52,18 @@ define weak void @kernel1() #0 {
define weak void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false)
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
ret void
}

Expand Down Expand Up @@ -112,8 +112,8 @@ define internal void @helper2() {
}

declare i32 @__kmpc_get_hardware_num_threads_in_block()
declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1
declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext) #1


!llvm.module.flags = !{!0, !1}
Expand Down
27 changes: 19 additions & 8 deletions llvm/test/Transforms/OpenMP/global_constructor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

define weak void @__omp_offloading_fd02_85283c04_main_l11(double* nonnull align 8 dereferenceable(8) %X) local_unnamed_addr {
entry:
%0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false, i1 false) #0
%0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false) #0
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -29,13 +29,13 @@ region.guarded:

region.barrier:
tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @1, i32 %2)
tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2, i1 false) #0
tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2) #0
br label %common.ret
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr

declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr
declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr

define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
entry:
Expand Down Expand Up @@ -78,21 +78,32 @@ attributes #1 = { convergent nounwind }
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = load double, double* @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store double [[TMP1]], double* [[X]], align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR1]]
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR2]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
;
; CHECK: Function Attrs: norecurse
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]]
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
; CHECK-NEXT: store double [[DIV]], double* @_ZL6Device, align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: ret void
;
8 changes: 4 additions & 4 deletions llvm/test/Transforms/OpenMP/globalization_remarks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ target triple = "nvptx64"

define void @foo() {
entry:
%c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true)
%c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true)
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
%x_on_stack = bitcast i8* %0 to i32*
%1 = bitcast i32* %x_on_stack to i8*
call void @share(i8* %1), !dbg !10
call void @__kmpc_free_shared(i8* %0)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
ret void
}

Expand All @@ -33,9 +33,9 @@ declare i8* @__kmpc_alloc_shared(i64)

declare void @__kmpc_free_shared(i8* nocapture)

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1);
declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1);

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i1)

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,22 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void

worker.exit: ; preds = %entry
ret void
}

define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
ret i32 0
}

Expand Down Expand Up @@ -149,7 +149,7 @@ entry:

declare i32 @__kmpc_global_thread_num(%struct.ident_t*)

declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8)

define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
entry:
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,26 @@ target triple = "nvptx64"
;.
define weak void @is_spmd() {
; CHECK-LABEL: define {{[^@]+}}@is_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @is_spmd_helper1()
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
call void @is_spmd_helper1()
call void @is_spmd_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
ret void
}

define weak void @will_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -50,12 +50,12 @@ define weak void @will_be_spmd() {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -67,41 +67,41 @@ user_code.entry:
%1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
call void @is_spmd_helper2()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}

define weak void @non_spmd() {
; CHECK-LABEL: define {{[^@]+}}@non_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}

define weak void @will_not_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}

Expand Down Expand Up @@ -198,8 +198,8 @@ entry:

declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable"
declare i8 @__kmpc_is_spmd_exec_mode()
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext)
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext)
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8)
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
declare void @foo()
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/OpenMP/parallel_level_fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,46 +19,46 @@ target triple = "nvptx64"
;.
define weak void @none_spmd() {
; CHECK-LABEL: define {{[^@]+}}@none_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @none_spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
call void @none_spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
ret void
}

define weak void @spmd() {
; CHECK-LABEL: define {{[^@]+}}@spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
call void @spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
ret void
}

define weak void @parallel() {
; CHECK-LABEL: define {{[^@]+}}@parallel() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
call void @spmd_helper()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
ret void
}

Expand Down Expand Up @@ -130,8 +130,8 @@ define internal void @parallel_helper() {
declare void @foo()
declare void @bar()
declare i8 @__kmpc_parallel_level()
declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext, i1 zeroext) #1
declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext) #1

!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2, !3, !4}
Expand Down
32 changes: 21 additions & 11 deletions llvm/test/Transforms/OpenMP/remove_globalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,44 +17,52 @@ target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, i8* }

; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
ret i32 0
}
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
;.
; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
;.
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLED-NEXT: ret i32 0
;
ret i32 0
}
declare void @__kmpc_target_deinit(%struct.ident_t*, i8)

define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false)
; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-NEXT: call void @bar() #[[ATTR0]]
; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR0]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true)
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false)
; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]]
; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR0]]
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp()
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
; CHECK-DISABLED-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true, i1 true)
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true)
call void @foo()
call void @bar()
call void @convert_and_move_alloca()
call void @unknown_no_openmp()
call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
ret void
}

Expand Down Expand Up @@ -294,3 +302,5 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-DISABLED: [[META10:![0-9]+]] = !DISubroutineType(types: !2)
; CHECK-DISABLED: [[DBG11]] = !DILocation(line: 6, column: 2, scope: !9)
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-REMARKS: {{.*}}
36 changes: 22 additions & 14 deletions llvm/test/Transforms/OpenMP/replace_globalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@ target triple = "nvptx64"

define dso_local void @foo() "kernel" {
entry:
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
%x = call align 4 i8* @__kmpc_alloc_shared(i64 4)
call void @unknown_no_openmp()
%x_on_stack = bitcast i8* %x to i32*
%0 = bitcast i32* %x_on_stack to i8*
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %x, i64 4)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
}

define void @bar() "kernel" {
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
call void @unknown_no_openmp()
%cmp = icmp eq i32 %c, -1
br i1 %cmp, label %master1, label %exit
Expand All @@ -60,12 +60,12 @@ master2:
call void @__kmpc_free_shared(i8* %y, i64 4)
br label %exit
exit:
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
ret void
}

define void @baz_spmd() "kernel" {
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true, i1 true)
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true)
call void @unknown_no_openmp()
%c0 = icmp eq i32 %c, -1
br i1 %c0, label %master3, label %exit
Expand All @@ -77,7 +77,7 @@ master3:
call void @__kmpc_free_shared(i8* %z, i64 24)
br label %exit
exit:
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2)
ret void
}

Expand Down Expand Up @@ -106,11 +106,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()

; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) {
define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
ret i32 0
}

declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8)

declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"

Expand Down Expand Up @@ -147,18 +147,18 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
Expand All @@ -173,13 +173,13 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@baz_spmd
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true, i1 true)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
Expand All @@ -189,7 +189,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR6]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
;
;
Expand All @@ -215,6 +215,11 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @stack, i32 0, i32 0) to i8*), i32 [[L]]
; CHECK-NEXT: ret i8* [[GEP]]
;
;
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
;.
; CHECK: attributes #[[ATTR0]] = { "kernel" }
; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind memory(write) }
Expand All @@ -238,3 +243,6 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META11:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
; CHECK: [[META12:![0-9]+]] = !DISubroutineType(types: !2)
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-LIMIT: {{.*}}
; CHECK-REMARKS: {{.*}}
Loading