42 changes: 21 additions & 21 deletions clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,19 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E_ADDR]], align 8
// CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK1-NEXT: store double [[TMP1]], ptr [[E1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -328,7 +328,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
Expand All @@ -344,7 +344,7 @@ int bar(int n){
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4)
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -677,15 +677,15 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
Expand Down Expand Up @@ -1250,17 +1250,17 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8
// CHECK2-NEXT: store double [[TMP3]], ptr [[E1]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -1519,7 +1519,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
Expand All @@ -1535,7 +1535,7 @@ int bar(int n){
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -1868,15 +1868,15 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
Expand Down Expand Up @@ -2441,17 +2441,17 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8
// CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
Expand Down Expand Up @@ -2710,7 +2710,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
Expand All @@ -2726,7 +2726,7 @@ int bar(int n){
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
Expand Down Expand Up @@ -3059,15 +3059,15 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
Expand Down
6 changes: 3 additions & 3 deletions clang/test/OpenMP/reduction_implicit_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,16 @@ int main()
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_kernel_environment)
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ void spmd(void) {
}

#pragma omp begin declare target device_type(nohost)
struct KernelEnvironmentTy;
__attribute__((weak))
extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
__attribute__((weak))
extern "C" int __kmpc_target_init(void *Ident, char Mode,
bool UseGenericStateMachine) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target
Expand Down
6 changes: 3 additions & 3 deletions clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ void spmd(void) {
}

#pragma omp begin declare target device_type(nohost)
struct KernelEnvironmentTy;
__attribute__((weak))
extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
__attribute__((weak))
extern "C" int __kmpc_target_init(void *Ident, char Mode,
bool UseGenericStateMachine) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target
Expand Down
20 changes: 10 additions & 10 deletions clang/test/OpenMP/target_parallel_debug_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG47]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment), !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG47]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]]
// CHECK1: user_code.entry:
Expand All @@ -113,7 +113,7 @@ int main() {
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG47]]
Expand Down Expand Up @@ -308,11 +308,11 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment), !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]])
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG138:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]]
Expand All @@ -325,8 +325,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB7]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG137]]
Expand Down Expand Up @@ -517,11 +517,11 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment), !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]])
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]]
Expand All @@ -530,8 +530,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG212]]
Expand Down
28 changes: 14 additions & 14 deletions clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment), !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]]
// CHECK1: user_code.entry:
Expand All @@ -110,7 +110,7 @@ int main() {
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]]
// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG41]]
Expand Down Expand Up @@ -383,11 +383,11 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment), !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]])
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]]
Expand All @@ -400,8 +400,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG146]]
Expand Down Expand Up @@ -466,7 +466,7 @@ int main() {
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
Expand Down Expand Up @@ -559,7 +559,7 @@ int main() {
// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]]
//
//
Expand Down Expand Up @@ -665,11 +665,11 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment), !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]])
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]]
Expand All @@ -678,8 +678,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG236]]
Expand Down Expand Up @@ -749,7 +749,7 @@ int main() {
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
Expand Down Expand Up @@ -846,7 +846,7 @@ int main() {
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]]
//
//
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1721,7 +1721,8 @@ class OpenMPIRBuilder {
/// Create a runtime call for kmpc_target_deinit
///
/// \param Loc The insert and source location description.
void createTargetDeinit(const LocationDescription &Loc);
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);

///}

Expand Down
13 changes: 4 additions & 9 deletions llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,6 @@ __OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, false, Int32, Int32, VoidP
Int64, Int64, Int32Arr3Ty, Int32Arr3Ty, Int32)
__OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
__OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
__OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
Int8, Int8, Int8)
__OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
__OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false,
ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr)

#undef __OMP_STRUCT_TYPE
#undef OMP_STRUCT_TYPE
Expand Down Expand Up @@ -457,8 +452,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)

/// OpenMP Device runtime functions
__OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr)
__OMP_RTL(__kmpc_target_deinit, false, Void,)
__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1)
__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
Expand Down Expand Up @@ -1017,9 +1012,9 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs, SExt))

__OMP_RTL_ATTRS(__kmpc_target_init, AttributeSet(), SExt,
ParamAttrs(AttributeSet()))
ParamAttrs(AttributeSet(), SExt, SExt))
__OMP_RTL_ATTRS(__kmpc_target_deinit, AttributeSet(), AttributeSet(),
ParamAttrs())
ParamAttrs(AttributeSet(), SExt))
__OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
AttributeSet(), AttributeSet(), AttributeSet(),
Expand Down
69 changes: 15 additions & 54 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -3899,60 +3898,14 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD);
ConstantInt *MayUseNestedParallelismVal =
ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true);
ConstantInt *DebugIndentionLevelVal =
ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), 0);

// We need to strip the debug prefix to get the correct kernel name.
Function *Kernel = Builder.GetInsertBlock()->getParent();
StringRef KernelName = Kernel->getName();
const std::string DebugPrefix = "_debug__";
if (KernelName.ends_with(DebugPrefix))
KernelName = KernelName.drop_back(DebugPrefix.length());
ConstantInt *UseGenericStateMachine =
ConstantInt::getBool(Int32->getContext(), !IsSPMD);

Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_init);
const DataLayout &DL = Fn->getParent()->getDataLayout();

Twine DynamicEnvironmentName = KernelName + "_dynamic_environment";
Constant *DynamicEnvironmentInitializer =
ConstantStruct::get(DynamicEnvironment, {DebugIndentionLevelVal});
Constant *DynamicEnvironmentGV = new GlobalVariable(
M, DynamicEnvironment, /* IsConstant */ false,
GlobalValue::InternalLinkage, DynamicEnvironmentInitializer,
DynamicEnvironmentName,
/* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal,
DL.getDefaultGlobalsAddressSpace());
if (DynamicEnvironmentGV->getType() != DynamicEnvironmentPtr)
DynamicEnvironmentGV = ConstantExpr::getAddrSpaceCast(
DynamicEnvironmentGV, DynamicEnvironmentPtr);

Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get(
ConfigurationEnvironment, {
UseGenericStateMachineVal,
MayUseNestedParallelismVal,
IsSPMDVal,
});
Constant *KernelEnvironmentInitializer = ConstantStruct::get(
KernelEnvironment, {
ConfigurationEnvironmentInitializer,
Ident,
DynamicEnvironmentGV,
});
Twine KernelEnvironmentName = KernelName + "_kernel_environment";
Constant *KernelEnvironmentGV = new GlobalVariable(
M, KernelEnvironment, /* IsConstant */ true, GlobalValue::ExternalLinkage,
KernelEnvironmentInitializer, KernelEnvironmentName,
/* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal,
DL.getDefaultGlobalsAddressSpace());
if (KernelEnvironmentGV->getType() != KernelEnvironmentPtr)
KernelEnvironmentGV = ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV,
KernelEnvironmentPtr);

CallInst *ThreadKind = Builder.CreateCall(Fn, {KernelEnvironmentGV});

CallInst *ThreadKind = Builder.CreateCall(
Fn, {Ident, IsSPMDVal, UseGenericStateMachine});

Value *ExecUserCode = Builder.CreateICmpEQ(
ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
Expand Down Expand Up @@ -3985,14 +3938,22 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
}

void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc) {
void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
bool IsSPMD) {
if (!updateToLocation(Loc))
return;

uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);

Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);

Builder.CreateCall(Fn, {});
Builder.CreateCall(Fn, {Ident, IsSPMDVal});
}

void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
Expand Down
339 changes: 111 additions & 228 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef

define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
%1 = tail call i32 @__kmpc_target_init(ptr null)
%1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
ret void
}

define internal i32 @__kmpc_target_init(ptr %0) {
define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) {
store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
%2 = call i1 @__kmpc_kernel_parallel()
%4 = call i1 @__kmpc_kernel_parallel()
ret i32 0
}

Expand All @@ -29,13 +29,13 @@ define internal i1 @__kmpc_kernel_parallel() {
; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
;.
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null)
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
; CHECK-NEXT: ret void
;
;
; CHECK: Function Attrs: norecurse nosync nounwind memory(write)
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: ret i32 0
;
;.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ entry:
define weak_odr void @t3() {
; CHECK-LABEL: define {{[^@]+}}@t3() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noalias nocapture noundef align 4294967296 null)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noalias nocapture noundef align 4294967296 null, i8 noundef 0, i1 noundef false, i1 noundef false)
; CHECK-NEXT: br label [[USER_CODE_ENTRY:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
Expand All @@ -321,7 +321,7 @@ define weak_odr void @t3() {
; CHECK-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr null)
%0 = call i32 @__kmpc_target_init(ptr null, i8 0, i1 false, i1 false)
br label %user_code.entry

user_code.entry: ; preds = %entry
Expand All @@ -335,7 +335,7 @@ for.body: ; preds = %for.cond
ret void
}

declare i32 @__kmpc_target_init(ptr)
declare i32 @__kmpc_target_init(ptr, i8, i1, i1)

define %S.2 @t3.helper() {
; CHECK-LABEL: define {{[^@]+}}@t3.helper() {
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/OpenMP/add_attributes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -736,9 +736,9 @@ declare i32 @__kmpc_shuffle_int32(i32, i16, i16);

declare i64 @__kmpc_shuffle_int64(i64, i16, i16);

declare void @__kmpc_target_deinit();
declare void @__kmpc_target_deinit(ptr, i8);

declare i32 @__kmpc_target_init(ptr);
declare i32 @__kmpc_target_init(ptr, i8, i1);

declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32);

Expand Down Expand Up @@ -1389,10 +1389,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; CHECK: declare i64 @__kmpc_shuffle_int64(i64, i16, i16)

; CHECK-NOT: Function Attrs
; CHECK: declare void @__kmpc_target_deinit()
; CHECK: declare void @__kmpc_target_deinit(ptr, i8)

; CHECK-NOT: Function Attrs
; CHECK: declare i32 @__kmpc_target_init(ptr)
; CHECK: declare i32 @__kmpc_target_init(ptr, i8, i1)

; CHECK-NOT: Function Attrs
; CHECK: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
Expand Down Expand Up @@ -2037,10 +2037,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; OPTIMISTIC: declare i64 @__kmpc_shuffle_int64(i64, i16, i16)

; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare void @__kmpc_target_deinit()
; OPTIMISTIC: declare void @__kmpc_target_deinit(ptr, i8)

; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr)
; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr, i8, i1)

; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
Expand Down Expand Up @@ -2701,7 +2701,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; EXT: declare void @__kmpc_target_deinit(ptr, i8 signext)

; EXT-NOT: Function Attrs
; EXT: declare signext i32 @__kmpc_target_init(ptr)
; EXT: declare signext i32 @__kmpc_target_init(ptr, i8 signext, i1 signext)

; EXT-NOT: Function Attrs
; EXT: declare void @__tgt_interop_destroy(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext)
Expand Down
42 changes: 10 additions & 32 deletions llvm/test/Transforms/OpenMP/always_inline_device.ll
Original file line number Diff line number Diff line change
@@ -1,27 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes
; RUN: opt < %s -S -passes=openmp-opt -openmp-opt-inline-device | FileCheck %s

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode = weak constant i8 1
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata"
@G = external global i8

@kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }

; Function Attrs: convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
;.
; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK: Function Attrs: norecurse nounwind
; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_environment)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
Expand All @@ -32,13 +24,13 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: store i8 1, ptr @G, align 1
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr @kernel_environment)
%0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

Expand All @@ -50,7 +42,7 @@ user_code.entry: ; preds = %entry
%isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
store i8 %isSPMD, ptr @G
call void @bar() #2
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void

worker.exit: ; preds = %entry
Expand All @@ -59,9 +51,9 @@ worker.exit: ; preds = %entry

declare i8 @__kmpc_is_spmd_exec_mode()

declare i32 @__kmpc_target_init(ptr)
declare i32 @__kmpc_target_init(ptr, i8, i1)

declare void @__kmpc_target_deinit()
declare void @__kmpc_target_deinit(ptr, i8)

; Function Attrs: convergent nounwind
define hidden void @bar() #1 {
Expand Down Expand Up @@ -91,17 +83,3 @@ attributes #2 = { convergent }
!5 = !{i32 7, !"PIC Level", i32 2}
!6 = !{i32 7, !"frame-pointer", i32 2}
!7 = !{!"clang version 14.0.0"}
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline convergent nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; CHECK: [[META7:![0-9]+]] = !{!"clang version 14.0.0"}
;.
422 changes: 104 additions & 318 deletions llvm/test/Transforms/OpenMP/custom_state_machines.ll

Large diffs are not rendered by default.

5,853 changes: 1,407 additions & 4,446 deletions llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll

Large diffs are not rendered by default.

20 changes: 9 additions & 11 deletions llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,33 +36,31 @@ target triple = "nvptx64"
;; }

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }

@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@2 = private unnamed_addr constant [82 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_fallback;11;1;;\00", align 1
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
@4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
@__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
@6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
@8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1
@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
@10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
@__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
@12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1
@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
@G = external global i32
@__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
@__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }

@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"

; Function Attrs: convergent norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment) #3, !dbg !18
%0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18

Expand All @@ -77,12 +75,12 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
call void @unknown() #6, !dbg !27
call void @__kmpc_target_deinit() #3, !dbg !28
call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28
br label %common.ret
}

; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(ptr) {
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}

Expand All @@ -101,13 +99,13 @@ entry:
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3

declare void @__kmpc_target_deinit() local_unnamed_addr
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr

; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment) #3, !dbg !33
%0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33

Expand All @@ -130,7 +128,7 @@ user_code.entry: ; preds = %entry
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
call void @no_openmp()
call void @no_parallelism()
call void @__kmpc_target_deinit() #3, !dbg !46
call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46
br label %common.ret
}

Expand Down
17 changes: 8 additions & 9 deletions llvm/test/Transforms/OpenMP/deduplication_target.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,51 +5,50 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }

@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, ptr @0 }, align 8
@__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
@__omp_offloading_50_a3e09bf8_foo_l2_exec_mode = weak constant i8 0
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata"

declare void @use(i32)

define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment)
%0 = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 false)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @2)
%2 = call i32 @__kmpc_global_thread_num(ptr @2)
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr @1, i8 2)
ret void

worker.exit: ; preds = %entry
ret void
}

declare i32 @__kmpc_target_init(ptr)
declare i32 @__kmpc_target_init(ptr, i8, i1)

declare i32 @__kmpc_global_thread_num(ptr) #1

declare void @__kmpc_target_deinit()
declare void @__kmpc_target_deinit(ptr, i8)

attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #1 = { nounwind }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,26 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"

%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@G = external global i32

@kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@kernel0_exec_mode = weak constant i8 1

@G = external global i32
;.
; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
; CHECK: @[[KERNEL0_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
; CHECK: @[[KERNEL1_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
; CHECK: @[[KERNEL2_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[KERNEL0_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[KERNEL1_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[KERNEL2_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
Expand All @@ -32,43 +31,49 @@ define weak void @kernel0() #0 {
; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]]
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment)

%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

@kernel1_exec_mode = weak constant i8 1

define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
; CHECK: exit.threads:
; CHECK-NEXT: ret void
; CHECK: main.thread.user_code:
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment)

%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @helper1()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

@kernel2_exec_mode = weak constant i8 1

define weak void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -79,12 +84,12 @@ define weak void @kernel2() #0 {
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%i = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment)
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -97,7 +102,7 @@ user_code.entry:
call void @helper1()
call void @helper2()
call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

Expand Down Expand Up @@ -193,8 +198,8 @@ define internal i32 @__kmpc_get_hardware_num_threads_in_block() {
ret i32 %ret
}
declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
declare i32 @__kmpc_target_init(ptr) #1
declare void @__kmpc_target_deinit() #1
declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) #1
declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) #1
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
declare i32 @__kmpc_global_thread_num(ptr)

Expand All @@ -209,6 +214,7 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
!2 = !{ptr @kernel0, !"kernel", i32 1}
!3 = !{ptr @kernel1, !"kernel", i32 1}
!4 = !{ptr @kernel2, !"kernel", i32 1}
;
;.
; CHECK: attributes #[[ATTR0]] = { "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
; CHECK: attributes #[[ATTR1]] = { nounwind }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,58 +2,68 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"

@G = external global i32
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@kernel0_exec_mode = weak constant i8 1

@G = external global i32
;.
; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
;.
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 true)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null)
%i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit(ptr null, i1 true)
ret void
}

@kernel1_exec_mode = weak constant i8 1

define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null)
%i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
call void @helper1()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i1 false)
ret void
}

@kernel2_exec_mode = weak constant i8 1

define weak void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false)
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i1 false)
ret void
}

Expand Down Expand Up @@ -102,8 +112,8 @@ define internal void @helper2() {
}

declare i32 @__kmpc_get_hardware_num_threads_in_block()
declare i32 @__kmpc_target_init(ptr) #1
declare void @__kmpc_target_deinit() #1
declare i32 @__kmpc_target_init(ptr, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(ptr nocapture readnone, i1 zeroext) #1


!llvm.module.flags = !{!0, !1}
Expand Down
24 changes: 11 additions & 13 deletions llvm/test/Transforms/OpenMP/global_constructor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,15 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }

@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@_ZL6Device = internal global double 0.000000e+00, align 8
@__omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
@__omp_offloading_fd02_85283c04_main_l11_exec_mode = weak constant i8 0

define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr {
entry:
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #0
%0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #0
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -31,13 +29,13 @@ region.guarded:

region.barrier:
tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2)
tail call void @__kmpc_target_deinit() #0
tail call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #0
br label %common.ret
}

declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr

declare void @__kmpc_target_deinit() local_unnamed_addr
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr

define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
entry:
Expand Down Expand Up @@ -80,29 +78,29 @@ attributes #1 = { convergent nounwind }
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR0]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR0]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit() #[[ATTR0]]
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR1]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
;
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]]
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0]]
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: ret void
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/Transforms/OpenMP/globalization_remarks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,17 @@ target triple = "nvptx64"
; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.

%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@S = external local_unnamed_addr global ptr
@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }

define void @foo() {
entry:
%c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
%c = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true)
%0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10
call void @share(ptr %0), !dbg !10
call void @__kmpc_free_shared(ptr %0)
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i1 false)
ret void
}

Expand All @@ -33,9 +31,9 @@ declare ptr @__kmpc_alloc_shared(i64)

declare void @__kmpc_free_shared(ptr nocapture)

declare i32 @__kmpc_target_init(ptr);
declare i32 @__kmpc_target_init(ptr, i1, i1);

declare void @__kmpc_target_deinit()
declare void @__kmpc_target_deinit(ptr, i1)

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,35 +36,34 @@


%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }

@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_10301_87b2c_foo_l7_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@__omp_offloading_10301_87b2c_foo_l7_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata"

define weak void @__omp_offloading_10301_87b2c_foo_l7() {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_10301_87b2c_foo_l7_kernel_environment)
%0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr)
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void

worker.exit: ; preds = %entry
ret void
}

define weak i32 @__kmpc_target_init(ptr %0) {
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}

Expand Down Expand Up @@ -147,7 +146,7 @@ entry:

declare i32 @__kmpc_global_thread_num(ptr)

declare void @__kmpc_target_deinit()
declare void @__kmpc_target_deinit(ptr, i8)

define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
entry:
Expand Down
57 changes: 29 additions & 28 deletions llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,45 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"

%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@is_spmd_exec_mode = weak constant i8 0
@will_be_spmd_exec_mode = weak constant i8 1
@non_spmd_exec_mode = weak constant i8 1
@will_not_be_spmd_exec_mode = weak constant i8 1
@G = external global i8
@is_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
@will_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@will_not_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@llvm.compiler.used = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode ], section "llvm.metadata"

;.
; CHECK: @[[IS_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[WILL_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[NON_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[WILL_NOT_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
; CHECK: @[[IS_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
; CHECK: @[[WILL_BE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
; CHECK: @[[NONE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
; CHECK: @[[WILL_NOT_BE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode], section "llvm.metadata"
;.
define weak void @is_spmd() {
; CHECK-LABEL: define {{[^@]+}}@is_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: call void @is_spmd_helper1()
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment)
%i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
call void @is_spmd_helper1()
call void @is_spmd_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 2)
ret void
}

define weak void @will_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_be_spmd_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -48,12 +49,12 @@ define weak void @will_be_spmd() {
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%i = call i32 @__kmpc_target_init(ptr @will_be_spmd_kernel_environment)
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -64,41 +65,41 @@ user_code.entry:
%0 = call i32 @__kmpc_global_thread_num(ptr null)
call void @is_spmd_helper2()
call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

define weak void @non_spmd() {
; CHECK-LABEL: define {{[^@]+}}@non_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

define weak void @will_not_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment)
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

Expand Down Expand Up @@ -195,8 +196,8 @@ entry:

declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable"
declare i8 @__kmpc_is_spmd_exec_mode()
declare i32 @__kmpc_target_init(ptr)
declare void @__kmpc_target_deinit()
declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext)
declare void @__kmpc_target_deinit(ptr nocapture readnone, i8)
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
declare i32 @__kmpc_global_thread_num(ptr)
declare void @foo()
Expand Down
49 changes: 31 additions & 18 deletions llvm/test/Transforms/OpenMP/nested_parallelism.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,39 +23,39 @@
target triple = "nvptx64"

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }

@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
@__omp_offloading_10302_bd7e0_main_l13_exec_mode = weak protected constant i8 3
@__omp_offloading_10302_bd7e0_main_l16_exec_mode = weak protected constant i8 1
@i_shared = internal addrspace(3) global [4 x i8] undef, align 16
@i.i_shared = internal addrspace(3) global [4 x i8] undef, align 16

@__omp_offloading_10302_bd7e0_main_l13_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr @1, ptr null }
@__omp_offloading_10302_bd7e0_main_l16_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }

@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_10302_bd7e0_main_l13_exec_mode, ptr @__omp_offloading_10302_bd7e0_main_l16_exec_mode], section "llvm.metadata"

;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak protected constant i8 3
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak protected constant i8 3
; CHECK: @[[I_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 16
; CHECK: @[[I_I_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 16
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_10302_bd7e0_main_l13_exec_mode, ptr @__omp_offloading_10302_bd7e0_main_l16_exec_mode], section "llvm.metadata"
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l13(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment)
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]]
; CHECK: region.guarded.i:
Expand All @@ -67,12 +67,12 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 nounde
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2)
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs.i = alloca [1 x ptr], align 8
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment) #6
%0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #6
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -96,11 +96,11 @@ _Z3fooi.internalized.exit: ; preds = %user_code.entry, %r
store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr %captured_vars_addrs.i, align 8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
call void @__kmpc_target_deinit() #6
call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #6
br label %common.ret
}

declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr

define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-LABEL: @_Z3fooi(
Expand Down Expand Up @@ -131,7 +131,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde
; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment)
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -140,16 +140,29 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde
; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR1]]
; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
; CHECK: region.check.tid:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP2]])
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2)
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs.i = alloca [1 x ptr], align 8
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment) #6
%0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #6
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -164,7 +177,7 @@ user_code.entry: ; preds = %entry
store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr %captured_vars_addrs.i, align 8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
call void @__kmpc_target_deinit() #6
call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #6
br label %common.ret
}

Expand Down
48 changes: 26 additions & 22 deletions llvm/test/Transforms/OpenMP/parallel_level_fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,62 +2,66 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"

%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@no_spmd_exec_mode = weak constant i8 1
@spmd_exec_mode = weak constant i8 0
@parallel_exec_mode = weak constant i8 0
@G = external global i16
@none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
@parallel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
@llvm.compiler.used = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"

;.
; CHECK: @[[NO_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[PARALLEL_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i16
; CHECK: @[[NONE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
; CHECK: @[[SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
; CHECK: @[[PARALLEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"
; CHECK: @[[NONE_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[PARALLEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
define weak void @none_spmd() {
; CHECK-LABEL: define {{[^@]+}}@none_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: call void @none_spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @none_spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}

define weak void @spmd() {
; CHECK-LABEL: define {{[^@]+}}@spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment)
%i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
call void @spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 2)
ret void
}

define weak void @parallel() {
; CHECK-LABEL: define {{[^@]+}}@parallel() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment)
%i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
call void @spmd_helper()
call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
call void @__kmpc_target_deinit()
call void @__kmpc_target_deinit(ptr null, i8 2)
ret void
}

Expand Down Expand Up @@ -129,8 +133,8 @@ define internal void @parallel_helper() {
declare void @foo()
declare void @bar()
declare zeroext i16 @__kmpc_parallel_level(ptr, i32)
declare i32 @__kmpc_target_init(ptr) #1
declare void @__kmpc_target_deinit() #1
declare i32 @__kmpc_target_init(ptr, i8 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(ptr nocapture readnone, i8 zeroext) #1

!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2, !3, !4}
Expand Down
Loading