diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index 16e7542a8e826..6090a91b6a3d9 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -774,8 +774,6 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.aftersections.fini: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] // CHECK3: omp_section_loop.preheader13: // CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND29]], align 4 @@ -811,16 +809,16 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] // CHECK3: omp_section_loop.body.case23.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body.case25: +// CHECK3: omp_section_loop.body.case26: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 // CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case25.split: +// CHECK3: omp_section_loop.body.case26.split: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after26: +// CHECK3: omp_section_loop.body.case26.section.after27: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after: +// CHECK3: omp_section_loop.body.case26.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] // CHECK3: omp_section_loop.body16.sections.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC17]] @@ -833,8 +831,6 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] // CHECK3: omp_section_loop.after19: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.after19sections.fini: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -894,8 +890,8 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] // CHECK3: omp_section_loop.body.case23.cncl: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK3: omp_section_loop.body.case25.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK3: omp_section_loop.body.case26.cncl: +// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE:.*]] // CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -967,8 +963,10 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 // CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK3: .cncl5: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +// CHECK3: .cncl4: +// CHECK3-NEXT: br label [[FINI:%.*]] +// CHECK3: .fini +// CHECK3-NEXT: br label %[[EXIT_STUB:omp.par.exit.exitStub]] // CHECK3: .cont: // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[LOADGEP_ARGC_ADDR]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[LOADGEP_ARGV_ADDR]], align 8 @@ -984,16 +982,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK3-NEXT: br label [[FINI]] // CHECK3: 14: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) // CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 // CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] // CHECK3: .cncl: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK3-NEXT: br label [[FINI]] // CHECK3: .split: // CHECK3-NEXT: br label [[TMP4]] // CHECK3: omp.par.exit.exitStub: @@ -1089,7 +1085,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK3-NEXT: br label [[FINI:%.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1100,7 +1096,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.end: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB19:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: br label [[CANCEL_CONT:.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: @@ -1153,6 +1149,8 @@ for (int i = 0; i < argc; ++i) { // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: +// CHECK3-NEXT: br label [[DOTFINI:.%*]] +// CHECK3: .fini: // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1162,9 +1160,11 @@ for (int i = 0; i < argc; ++i) { // CHECK3: .omp.sections.case2.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] // CHECK3: .omp.sections.case2.section.after: -// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE]] +// CHECK3: omp_region.finalize: +// CHECK3-NEXT: br label [[OMP_SECTIONS_EXIT:.*]] // CHECK3: .omp.sections.case2.cncl: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END]] +// CHECK3-NEXT: br label [[FINI:.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp index 5c752d354804b..9620613dfdb87 100644 --- a/clang/test/OpenMP/critical_codegen.cpp +++ b/clang/test/OpenMP/critical_codegen.cpp @@ -35,6 +35,8 @@ int main() { // ALL-NEXT: store i8 2, ptr [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[OMP_REGION_FINALIZE:[^ ,]+]] +// IRBUILDER: [[OMP_REGION_FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical(ptr [[DEFAULT_LOC]], i32 [[GTID]], ptr [[UNNAMED_LOCK]]) #pragma omp critical a = 2; diff --git a/clang/test/OpenMP/critical_codegen_attr.cpp b/clang/test/OpenMP/critical_codegen_attr.cpp index 32482a92e76b8..50b0b04fcfd4a 100644 --- a/clang/test/OpenMP/critical_codegen_attr.cpp +++ b/clang/test/OpenMP/critical_codegen_attr.cpp @@ -35,6 +35,8 @@ int main() { // ALL-NEXT: store i8 2, ptr [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[OMP_REGION_FINALIZE:[^ ,]+]] +// IRBUILDER: [[OMP_REGION_FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical(ptr [[DEFAULT_LOC]], i32 [[GTID]], ptr [[UNNAMED_LOCK]]) [[omp::directive(critical)]] a = 2; diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c index 5cc5640a5173b..56cf9644de5ed 100644 --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -449,7 +449,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]) // CHECK-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]] -// CHECK: omp_loop.preheader187: +// CHECK: omp_loop.preheader190: // CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1 // CHECK-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4 @@ -461,13 +461,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] // CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER191:%.*]] -// CHECK: omp_loop.header188: +// CHECK: omp_loop.header191: // CHECK-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ] // CHECK-NEXT: br label [[OMP_LOOP_COND192:%.*]] -// CHECK: omp_loop.cond189: +// CHECK: omp_loop.cond192: // CHECK-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]] // CHECK-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]] -// CHECK: omp_loop.body190: +// CHECK: omp_loop.body193: // CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]] // CHECK-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]) // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 @@ -478,15 +478,15 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8 // CHECK-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC194]] -// CHECK: omp_loop.inc191: +// CHECK: omp_loop.inc194: // CHECK-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER191]] -// CHECK: omp_loop.exit192: +// CHECK: omp_loop.exit195: // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]]) // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM208]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER196:%.*]] -// CHECK: omp_loop.after193: +// CHECK: omp_loop.after196: // CHECK-NEXT: ret void // // @@ -576,7 +576,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]) // CHECK-NEXT: [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_PREHEADER165:%.*]] -// CHECK: omp_loop.preheader163: +// CHECK: omp_loop.preheader165: // CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND179]], align 4 // CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1 // CHECK-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4 @@ -588,24 +588,24 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]] // CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER166:%.*]] -// CHECK: omp_loop.header164: +// CHECK: omp_loop.header166: // CHECK-NEXT: [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ] // CHECK-NEXT: br label [[OMP_LOOP_COND167:%.*]] -// CHECK: omp_loop.cond165: +// CHECK: omp_loop.cond167: // CHECK-NEXT: [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]] // CHECK-NEXT: br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]] -// CHECK: omp_loop.exit168: +// CHECK: omp_loop.exit170: // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM182]]) // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM183]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER171:%.*]] -// CHECK: omp_loop.after169: +// CHECK: omp_loop.after171: // CHECK-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK: omp.par.region.parallel.after: // CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK: omp.par.pre_finalize: // CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]] -// CHECK: omp_loop.body166: +// CHECK: omp_loop.body168: // CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]] // CHECK-NEXT: call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]) // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 @@ -616,7 +616,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 // CHECK-NEXT: store float [[CONV177]], ptr [[TMP21]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC169]] -// CHECK: omp_loop.inc167: +// CHECK: omp_loop.inc169: // CHECK-NEXT: [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER166]] // CHECK: omp_loop.body: @@ -758,7 +758,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK: omp_loop.after86: // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL213:%.*]] -// CHECK: omp_parallel210: +// CHECK: omp_parallel213: // CHECK-NEXT: [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0 // CHECK-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR210]], align 8 // CHECK-NEXT: [[GEP_B_ADDR211:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 1 @@ -777,7 +777,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]) // CHECK-NEXT: [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_PREHEADER140:%.*]] -// CHECK: omp_loop.preheader139: +// CHECK: omp_loop.preheader140: // CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND154]], align 4 // CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1 // CHECK-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4 @@ -789,24 +789,26 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]] // CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER141:%.*]] -// CHECK: omp_loop.header140: +// CHECK: omp_loop.header141: // CHECK-NEXT: [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ] // CHECK-NEXT: br label [[OMP_LOOP_COND142:%.*]] -// CHECK: omp_loop.cond141: +// CHECK: omp_loop.cond142: // CHECK-NEXT: [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]] // CHECK-NEXT: br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]] -// CHECK: omp_loop.exit144: +// CHECK: omp_loop.exit145: // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM157]]) // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM158]]) // CHECK-NEXT: br label [[OMP_LOOP_AFTER146:%.*]] -// CHECK: omp_loop.after145: +// CHECK: omp_loop.after146: // CHECK-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]] // CHECK: omp.par.region9.parallel.after: // CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE10:%.*]] // CHECK: omp.par.pre_finalize10: -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]] -// CHECK: omp_loop.body142: +// CHECK-NEXT: br label [[FINI159:%.*]] +// CHECK: .fini159: +// CHECK-NEXT: br label [[OMP_PAR_EXIT11_EXITSTUB:%.*]] +// CHECK: omp_loop.body143: // CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]] // CHECK-NEXT: call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]) // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 @@ -817,7 +819,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8 // CHECK-NEXT: store float [[CONV152]], ptr [[TMP29]], align 4 // CHECK-NEXT: br label [[OMP_LOOP_INC144]] -// CHECK: omp_loop.inc143: +// CHECK: omp_loop.inc144: // CHECK-NEXT: [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1 // CHECK-NEXT: br label [[OMP_LOOP_HEADER141]] // CHECK: omp_loop.body83: @@ -1557,6 +1559,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG: omp.par.region.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK-DEBUG: omp.par.pre_finalize: +// CHECK-DEBUG-NEXT: br label [[FINI:.*]] +// CHECK-DEBUG: .fini: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG30]] // CHECK-DEBUG: omp_loop.body: // CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP5]], !dbg [[DBG29]] @@ -1700,6 +1704,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG: omp.par.region.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK-DEBUG: omp.par.pre_finalize: +// CHECK-DEBUG-NEXT: br label [[FINI16:%.*]] +// CHECK-DEBUG: .fini16: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG92]] // CHECK-DEBUG: omp.par.exit.exitStub: // CHECK-DEBUG-NEXT: ret void @@ -1769,6 +1775,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG: omp.par.region5.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE6:%.*]] // CHECK-DEBUG: omp.par.pre_finalize6: +// CHECK-DEBUG-NEXT: br label [[FINI:%.*]] +// CHECK-DEBUG: .fini: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG103]] // CHECK-DEBUG: omp_loop.body: // CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG102]] @@ -1899,7 +1907,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]] // CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.preheader187: +// CHECK-DEBUG: omp_loop.preheader190: // CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] // CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] // CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] @@ -1911,13 +1919,13 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] // CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.header188: +// CHECK-DEBUG: omp_loop.header191: // CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.cond189: +// CHECK-DEBUG: omp_loop.cond192: // CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] // CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.body190: +// CHECK-DEBUG: omp_loop.body193: // CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]] // CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]] // CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] @@ -1928,15 +1936,15 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] // CHECK-DEBUG-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.inc191: +// CHECK-DEBUG: omp_loop.inc194: // CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.exit192: +// CHECK-DEBUG: omp_loop.exit195: // CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] // CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]] // CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.after193: +// CHECK-DEBUG: omp_loop.after196: // CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // // @@ -2031,7 +2039,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG174]] // CHECK-DEBUG-NEXT: [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG174]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.preheader163: +// CHECK-DEBUG: omp_loop.preheader165: // CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]] // CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG174]] // CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]] @@ -2043,24 +2051,26 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG174]] // CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG174]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.header164: +// CHECK-DEBUG: omp_loop.header166: // CHECK-DEBUG-NEXT: [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG174]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.cond165: +// CHECK-DEBUG: omp_loop.cond167: // CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG174]] // CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.exit168: +// CHECK-DEBUG: omp_loop.exit170: // CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG174]] // CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]] // CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG176]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.after169: +// CHECK-DEBUG: omp_loop.after171: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG177:![0-9]+]] // CHECK-DEBUG: omp.par.region.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK-DEBUG: omp.par.pre_finalize: +// CHECK-DEBUG-NEXT: br label [[FINI184:%.*]] +// CHECK-DEBUG: .fini184: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG177]] -// CHECK-DEBUG: omp_loop.body166: +// CHECK-DEBUG: omp_loop.body168: // CHECK-DEBUG-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG176]] // CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG174]] // CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]] @@ -2071,7 +2081,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG180:![0-9]+]] // CHECK-DEBUG-NEXT: store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG181:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC169]], !dbg [[DBG174]] -// CHECK-DEBUG: omp_loop.inc167: +// CHECK-DEBUG: omp_loop.inc169: // CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG174]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166]], !dbg [[DBG174]] // CHECK-DEBUG: omp_loop.body: @@ -2218,7 +2228,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG: omp_loop.after86: // CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL213:%.*]] -// CHECK-DEBUG: omp_parallel210: +// CHECK-DEBUG: omp_parallel213: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0 // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR210]], align 8 // CHECK-DEBUG-NEXT: [[GEP_B_ADDR211:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 1 @@ -2238,7 +2248,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG217]] // CHECK-DEBUG-NEXT: [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG217]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.preheader139: +// CHECK-DEBUG: omp_loop.preheader140: // CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]] // CHECK-DEBUG-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG217]] // CHECK-DEBUG-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]] @@ -2250,24 +2260,26 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG217]] // CHECK-DEBUG-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG217]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.header140: +// CHECK-DEBUG: omp_loop.header141: // CHECK-DEBUG-NEXT: [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG217]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.cond141: +// CHECK-DEBUG: omp_loop.cond142: // CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG217]] // CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.exit144: +// CHECK-DEBUG: omp_loop.exit145: // CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG217]] // CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]] // CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG219]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.after145: +// CHECK-DEBUG: omp_loop.after146: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG220:![0-9]+]] // CHECK-DEBUG: omp.par.region9.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE10:%.*]] // CHECK-DEBUG: omp.par.pre_finalize10: +// CHECK-DEBUG-NEXT: br label [[FINI159:%.*]] +// CHECK-DEBUG: .fini159: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG220]] -// CHECK-DEBUG: omp_loop.body142: +// CHECK-DEBUG: omp_loop.body143: // CHECK-DEBUG-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG219]] // CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG217]] // CHECK-DEBUG-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG221:![0-9]+]] @@ -2278,7 +2290,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG223:![0-9]+]] // CHECK-DEBUG-NEXT: store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG224:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC144]], !dbg [[DBG217]] -// CHECK-DEBUG: omp_loop.inc143: +// CHECK-DEBUG: omp_loop.inc144: // CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG217]] // CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141]], !dbg [[DBG217]] // CHECK-DEBUG: omp_loop.body83: @@ -2375,8 +2387,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG: omp_loop.after121: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], !dbg [[DBG244:![0-9]+]] // CHECK-DEBUG: omp.par.region103.parallel.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE104:%.*]] -// CHECK-DEBUG: omp.par.pre_finalize104: +// CHECK-DEBUG-NEXT: br label [[FINI134:%.*]] +// CHECK-DEBUG: .fini134: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], !dbg [[DBG244]] // CHECK-DEBUG: omp_loop.body118: // CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], !dbg [[DBG243]] @@ -2460,6 +2472,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG: omp.par.region44.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE45:%.*]] // CHECK-DEBUG: omp.par.pre_finalize45: +// CHECK-DEBUG-NEXT: br label [[FINI:%.*]] +// CHECK-DEBUG: .fini: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG260]] // CHECK-DEBUG: omp_loop.body59: // CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], !dbg [[DBG259]] diff --git a/clang/test/OpenMP/masked_codegen.cpp b/clang/test/OpenMP/masked_codegen.cpp index a39de12d69337..bc6f68de9b248 100644 --- a/clang/test/OpenMP/masked_codegen.cpp +++ b/clang/test/OpenMP/masked_codegen.cpp @@ -35,6 +35,8 @@ int main() { // ALL-NEXT: store i8 2, ptr [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[OMP_REGION_FINALIZE:[^ ,]+]] +// IRBUILDER: [[OMP_REGION_FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_masked(ptr [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp index a7af326caacfe..5a92444d9a927 100644 --- a/clang/test/OpenMP/master_codegen.cpp +++ b/clang/test/OpenMP/master_codegen.cpp @@ -35,6 +35,8 @@ int main() { // ALL-NEXT: store i8 2, ptr [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[OMP_REGION_FINALIZE:[^ ,]+]] +// IRBUILDER: [[OMP_REGION_FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_master(ptr [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp index 9aefc6a739e51..e01fd0da31ee8 100644 --- a/clang/test/OpenMP/nested_loop_codegen.cpp +++ b/clang/test/OpenMP/nested_loop_codegen.cpp @@ -904,6 +904,8 @@ int inline_decl() { // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[FINI:%.*]] +// CHECK4: .fini: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG27]] // CHECK4: for.body: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_K]], align 4, !dbg [[DBG28:![0-9]+]] @@ -1083,6 +1085,8 @@ int inline_decl() { // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[FINI:%.*]] +// CHECK4: .fini: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG90]] // CHECK4: for.body: // CHECK4-NEXT: #dbg_declare(ptr [[K]], [[META91:![0-9]+]], !DIExpression(), [[META95:![0-9]+]]) diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp index 5cd95f1927e5c..3b29feac7caa2 100644 --- a/clang/test/OpenMP/ordered_codegen.cpp +++ b/clang/test/OpenMP/ordered_codegen.cpp @@ -794,6 +794,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK1-IRBUILDER: omp_region.finalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -884,6 +886,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: store float [[MUL7]], ptr [[ARRAYIDX8]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK1-IRBUILDER: omp_region.finalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1022,6 +1026,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: store float [[MUL29]], ptr [[ARRAYIDX31]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK1-IRBUILDER: omp_region.finalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1131,6 +1137,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: store float [[MUL14]], ptr [[ARRAYIDX16]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK1-IRBUILDER: omp_region.finalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1296,17 +1304,19 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK1-IRBUILDER: omp.body.continue38: -// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK1-IRBUILDER: omp.inner.for.inc39: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE38:%.*]] +// CHECK1-IRBUILDER: omp_region.finalize38: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK1-IRBUILDER: omp.body.continue39: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc40: // CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4 // CHECK1-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 // CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4 // CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]]) // CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP5:![0-9]+]] -// CHECK1-IRBUILDER: omp.inner.for.end42: +// CHECK1-IRBUILDER: omp.inner.for.end43: // CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1-IRBUILDER: omp.dispatch.inc: // CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -2034,6 +2044,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK3-IRBUILDER: omp_region.finalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -2124,6 +2136,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: store float [[MUL7]], ptr [[ARRAYIDX8]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK3-IRBUILDER: omp_region.finalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -2262,6 +2276,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: store float [[MUL29]], ptr [[ARRAYIDX31]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK3-IRBUILDER: omp_region.finalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -2371,6 +2387,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: store float [[MUL14]], ptr [[ARRAYIDX16]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +// CHECK3-IRBUILDER: omp_region.finalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -2536,17 +2554,19 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK3-IRBUILDER: omp.body.continue38: -// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK3-IRBUILDER: omp.inner.for.inc39: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_REGION_FINALIZE38:%.*]] +// CHECK3-IRBUILDER: omp_region.finalize38: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK3-IRBUILDER: omp.body.continue39: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc40: // CHECK3-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4 // CHECK3-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 // CHECK3-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4 // CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]]) // CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP5:![0-9]+]] -// CHECK3-IRBUILDER: omp.inner.for.end42: +// CHECK3-IRBUILDER: omp.inner.for.end43: // CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3-IRBUILDER: omp.dispatch.inc: // CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index e8e57aedaa164..9f6004e37db9c 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -906,6 +906,8 @@ int main (int argc, char **argv) { // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[FINI:%.*]] +// CHECK4: .fini: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]] // CHECK4: omp.par.exit.exitStub: // CHECK4-NEXT: ret void @@ -975,6 +977,8 @@ int main (int argc, char **argv) { // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[FINI:%.*]] +// CHECK4: .fini: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG66]] // CHECK4: omp.par.exit.exitStub: // CHECK4-NEXT: ret void diff --git a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 index cf77c46346b7f..fd59d39b552da 100644 --- a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 +++ b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 @@ -174,10 +174,13 @@ subroutine worst_case(a, b, c, d) ! CHECK-NEXT: br label %omp.par.pre_finalize ! CHECK: omp.par.pre_finalize: ; preds = %reduce.finalize +! CHECK-NEXT: br label %.fini + +! CHECK: .fini: ! CHECK-NEXT: %{{.*}} = load ptr, ptr ! CHECK-NEXT: br label %omp.reduction.cleanup -! CHECK: omp.reduction.cleanup: ; preds = %omp.par.pre_finalize +! CHECK: omp.reduction.cleanup: ; preds = %.fini ! [null check] ! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup43, label %omp.reduction.cleanup44 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index b801e212ceced..3efbdc4fe17d6 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -576,16 +576,33 @@ class OpenMPIRBuilder { using FinalizeCallbackTy = std::function; struct FinalizationInfo { - /// The finalization callback provided by the last in-flight invocation of - /// createXXXX for the directive of kind DK. - FinalizeCallbackTy FiniCB; - + FinalizationInfo(FinalizeCallbackTy FiniCB, omp::Directive DK, + bool IsCancellable) + : DK(DK), IsCancellable(IsCancellable), FiniCB(std::move(FiniCB)) {} /// The directive kind of the innermost directive that has an associated /// region which might require finalization when it is left. - omp::Directive DK; + const omp::Directive DK; /// Flag to indicate if the directive is cancellable. - bool IsCancellable; + const bool IsCancellable; + + /// The basic block to which control should be transferred to + /// implement the FiniCB. Memoized to avoid generating finalization + /// multiple times. + Expected getFiniBB(IRBuilderBase &Builder); + + /// For cases where there is an unavoidable existing finalization block + /// (e.g. loop finialization after omp sections). The existing finalization + /// block must not contain any non-finalization code. + Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB); + + private: + /// Access via getFiniBB. + BasicBlock *FiniBB = nullptr; + + /// The finalization callback provided by the last in-flight invocation of + /// createXXXX for the directive of kind DK. + FinalizeCallbackTy FiniCB; }; /// Push a finalization callback on the finalization stack. @@ -2246,8 +2263,7 @@ class OpenMPIRBuilder { /// /// \return an error, if any were triggered during execution. LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, - omp::Directive CanceledDirective, - FinalizeCallbackTy ExitCB = {}); + omp::Directive CanceledDirective); /// Generate a target region entry call. /// @@ -3402,7 +3418,8 @@ class OpenMPIRBuilder { /// Common interface to finalize the region /// /// \param OMPD Directive to generate exiting code for - /// \param FinIP Insertion point for emitting Finalization code and exit call + /// \param FinIP Insertion point for emitting Finalization code and exit call. + /// This block must not contain any non-finalization code. /// \param ExitCall Call to the ending OMP Runtime Function /// \param HasFinalize indicate if the directive will require finalization /// and has a finalization callback in the stack that diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index cf88c4309974f..0d196be2ee696 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -682,6 +682,47 @@ OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { return {FnTy, Fn}; } +Expected +OpenMPIRBuilder::FinalizationInfo::getFiniBB(IRBuilderBase &Builder) { + if (!FiniBB) { + Function *ParentFunc = Builder.GetInsertBlock()->getParent(); + IRBuilderBase::InsertPointGuard Guard(Builder); + FiniBB = BasicBlock::Create(Builder.getContext(), ".fini", ParentFunc); + Builder.SetInsertPoint(FiniBB); + // FiniCB adds the branch to the exit stub. + if (Error Err = FiniCB(Builder.saveIP())) + return Err; + } + return FiniBB; +} + +Error OpenMPIRBuilder::FinalizationInfo::mergeFiniBB(IRBuilderBase &Builder, + BasicBlock *OtherFiniBB) { + // Simple case: FiniBB does not exist yet: re-use OtherFiniBB. + if (!FiniBB) { + FiniBB = OtherFiniBB; + + Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt()); + if (Error Err = FiniCB(Builder.saveIP())) + return Err; + + return Error::success(); + } + + // Move instructions from FiniBB to the start of OtherFiniBB. + auto EndIt = FiniBB->end(); + if (FiniBB->size() >= 1) + if (auto Prev = std::prev(EndIt); Prev->isTerminator()) + EndIt = Prev; + OtherFiniBB->splice(OtherFiniBB->getFirstNonPHIIt(), FiniBB, FiniBB->begin(), + EndIt); + + FiniBB->replaceAllUsesWith(OtherFiniBB); + FiniBB->eraseFromParent(); + FiniBB = OtherFiniBB; + return Error::success(); +} + Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); auto *Fn = dyn_cast(RTLFn.getCallee()); @@ -1129,21 +1170,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = createRuntimeFunctionCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); - auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) -> Error { - if (CanceledDirective == OMPD_parallel) { - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - return createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, - /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false) - .takeError(); - } - return Error::success(); - }; // The actual cancel logic is shared with others, e.g., cancel_barriers. - if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB)) + if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective)) return Err; // Update the insertion point and remove the terminator we introduced. @@ -1180,21 +1209,9 @@ OpenMPIRBuilder::createCancellationPoint(const LocationDescription &Loc, Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = createRuntimeFunctionCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args); - auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) -> Error { - if (CanceledDirective == OMPD_parallel) { - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - return createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, - /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false) - .takeError(); - } - return Error::success(); - }; // The actual cancel logic is shared with others, e.g., cancel_barriers. - if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB)) + if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective)) return Err; // Update the insertion point and remove the terminator we introduced. @@ -1298,8 +1315,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch( } Error OpenMPIRBuilder::emitCancelationCheckImpl( - Value *CancelFlag, omp::Directive CanceledDirective, - FinalizeCallbackTy ExitCB) { + Value *CancelFlag, omp::Directive CanceledDirective) { assert(isLastFinalizationInfoCancellable(CanceledDirective) && "Unexpected cancellation!"); @@ -1326,13 +1342,12 @@ Error OpenMPIRBuilder::emitCancelationCheckImpl( // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. - Builder.SetInsertPoint(CancellationBlock); - if (ExitCB) - if (Error Err = ExitCB(Builder.saveIP())) - return Err; auto &FI = FinalizationStack.back(); - if (Error Err = FI.FiniCB(Builder.saveIP())) - return Err; + Expected FiniBBOrErr = FI.getFiniBB(Builder); + if (!FiniBBOrErr) + return FiniBBOrErr.takeError(); + Builder.SetInsertPoint(CancellationBlock); + Builder.CreateBr(*FiniBBOrErr); // The continuation block is where code generation continues. Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); @@ -1821,8 +1836,18 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); - if (Error Err = FiniCB(PreFiniIP)) - return Err; + Expected FiniBBOrErr = FiniInfo.getFiniBB(Builder); + if (!FiniBBOrErr) + return FiniBBOrErr.takeError(); + { + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.restoreIP(PreFiniIP); + Builder.CreateBr(*FiniBBOrErr); + // There's currently a branch to omp.par.exit. Delete it. We will get there + // via the fini block + if (Instruction *Term = Builder.GetInsertBlock()->getTerminator()) + Term->eraseFromParent(); + } // Register the outlined info. addOutlineInfo(std::move(OI)); @@ -2258,23 +2283,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( if (!updateToLocation(Loc)) return Loc.IP; - // FiniCBWrapper needs to create a branch to the loop finalization block, but - // this has not been created yet at some times when this callback runs. - SmallVector CancellationBranches; - auto FiniCBWrapper = [&](InsertPointTy IP) { - if (IP.getBlock()->end() != IP.getPoint()) - return FiniCB(IP); - // This must be done otherwise any nested constructs using FinalizeOMPRegion - // will fail because that function requires the Finalization Basic Block to - // have a terminator, which is already removed by EmitOMPRegionBody. - // IP is currently at cancelation block. - BranchInst *DummyBranch = Builder.CreateBr(IP.getBlock()); - IP = InsertPointTy(DummyBranch->getParent(), DummyBranch->getIterator()); - CancellationBranches.push_back(DummyBranch); - return FiniCB(IP); - }; - - FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); + FinalizationStack.push_back({FiniCB, OMPD_sections, IsCancellable}); // Each section is emitted as a switch case // Each finalization callback is handled from clang.EmitOMPSectionDirective() @@ -2340,20 +2349,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( auto FiniInfo = FinalizationStack.pop_back_val(); assert(FiniInfo.DK == OMPD_sections && "Unexpected finalization stack state!"); - if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) { - Builder.restoreIP(AfterIP); - BasicBlock *FiniBB = - splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini"); - if (Error Err = CB(Builder.saveIP())) - return Err; - AfterIP = {FiniBB, FiniBB->begin()}; - } - - // Now we can fix the dummy branch to point to the right place - for (BranchInst *DummyBranch : CancellationBranches) { - assert(DummyBranch->getNumSuccessors() == 1); - DummyBranch->setSuccessor(0, LoopFini); - } + if (Error Err = FiniInfo.mergeFiniBB(Builder, LoopFini)) + return Err; return AfterIP; } @@ -6718,9 +6715,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion( emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); if (!AfterIP) return AfterIP.takeError(); - assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && - "Unexpected Control Flow State!"); - MergeBlockIntoPredecessor(FiniBB); // If we are skipping the region of a non conditional, remove the exit // block, and clear the builder's insertion point. @@ -6780,14 +6774,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit( FinalizationInfo Fi = FinalizationStack.pop_back_val(); assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); - if (Error Err = Fi.FiniCB(FinIP)) - return Err; - - BasicBlock *FiniBB = FinIP.getBlock(); - Instruction *FiniBBTI = FiniBB->getTerminator(); + if (Error Err = Fi.mergeFiniBB(Builder, FinIP.getBlock())) + return std::move(Err); - // set Builder IP for call creation - Builder.SetInsertPoint(FiniBBTI); + // Exit condition: insertion point is before the terminator of the new Fini + // block + Builder.SetInsertPoint(FinIP.getBlock()->getTerminator()); } if (!ExitCall) diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll index 83452e72b56b9..1bbac5cc3154b 100644 --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -4880,6 +4880,8 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: +; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +; CHECK2: omp_region.finalize: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.exit.exitStub: @@ -4974,6 +4976,8 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: +; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +; CHECK2: omp_region.finalize: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.exit.exitStub: @@ -5070,6 +5074,8 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: +; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +; CHECK2: omp_region.finalize: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.exit.exitStub: @@ -5157,6 +5163,8 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: +; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +; CHECK2: omp_region.finalize: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.exit.exitStub: @@ -5254,6 +5262,8 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: +; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +; CHECK2: omp_region.finalize: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.exit.exitStub: @@ -5434,6 +5444,8 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: +; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +; CHECK2: omp_region.finalize: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.exit.exitStub: @@ -5624,8 +5636,10 @@ entry: ; CHECK2: omp.par.region.split: ; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK2: omp.par.pre_finalize: -; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -; CHECK2: omp_region.body5: +; CHECK2-NEXT: br label [[FINI:%.*]] +; CHECK2: .fini: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_EXITSTUB:.*]] +; CHECK2: omp_region.body6: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK2: seq.par.merged2: ; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 @@ -5634,7 +5648,9 @@ entry: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] -; CHECK2: omp_region.body5.split: +; CHECK2: omp_region.body6.split: +; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE5:%.*]] +; CHECK2: omp_region.finalize{{.*}}: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: br label [[OMP_REGION_END4]] ; CHECK2: omp_region.body: @@ -5646,6 +5662,8 @@ entry: ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: +; CHECK2-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] +; CHECK2: omp_region.finalize: ; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.exit.exitStub: diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 1f35b7a5cfaa4..dab0a46eeb3bc 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -428,8 +428,8 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel)); Builder.restoreIP(NewIP); EXPECT_FALSE(M->global_empty()); - EXPECT_EQ(M->size(), 4U); - EXPECT_EQ(F->size(), 4U); + EXPECT_EQ(M->size(), 3U); + EXPECT_EQ(F->size(), 5U); EXPECT_EQ(BB->size(), 4U); CallInst *GTID = dyn_cast(&BB->front()); @@ -449,23 +449,16 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock()); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); - CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); - EXPECT_NE(GTID1, nullptr); - EXPECT_EQ(GTID1->arg_size(), 1U); - EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); - CallInst *Barrier = dyn_cast(GTID1->getNextNode()); - EXPECT_NE(Barrier, nullptr); - EXPECT_EQ(Barrier->arg_size(), 2U); - EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); - EXPECT_TRUE(Barrier->use_empty()); + EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 1U); EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); + // cancel branch instruction (1) -> .cncl -> .fini -> CBB + EXPECT_EQ(CancelBBTI->getSuccessor(1) + ->getTerminator() + ->getSuccessor(0) + ->getTerminator() + ->getSuccessor(0), + CBB); EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); @@ -497,8 +490,8 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel)); Builder.restoreIP(NewIP); EXPECT_FALSE(M->global_empty()); - EXPECT_EQ(M->size(), 4U); - EXPECT_EQ(F->size(), 7U); + EXPECT_EQ(M->size(), 3U); + EXPECT_EQ(F->size(), 8U); EXPECT_EQ(BB->size(), 1U); ASSERT_TRUE(isa(BB->getTerminator())); ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U); @@ -524,23 +517,15 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), NewIP.getBlock()); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); - CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); - EXPECT_NE(GTID1, nullptr); - EXPECT_EQ(GTID1->arg_size(), 1U); - EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); - CallInst *Barrier = dyn_cast(GTID1->getNextNode()); - EXPECT_NE(Barrier, nullptr); - EXPECT_EQ(Barrier->arg_size(), 2U); - EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); - EXPECT_TRUE(Barrier->use_empty()); + EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 1U); EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); + EXPECT_EQ(CancelBBTI->getSuccessor(1) + ->getTerminator() + ->getSuccessor(0) + ->getTerminator() + ->getSuccessor(0), + CBB); EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); @@ -572,7 +557,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { Builder.restoreIP(NewIP); EXPECT_FALSE(M->global_empty()); EXPECT_EQ(M->size(), 3U); - EXPECT_EQ(F->size(), 4U); + EXPECT_EQ(F->size(), 5U); EXPECT_EQ(BB->size(), 4U); CallInst *GTID = dyn_cast(&BB->front()); @@ -595,7 +580,11 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); - EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), + EXPECT_EQ(BarrierBBTI->getSuccessor(1) + ->getTerminator() + ->getSuccessor(0) + ->getTerminator() + ->getSuccessor(0), CBB); EXPECT_EQ(cast(Barrier)->getArgOperand(1), GTID); @@ -1291,8 +1280,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 0U); - EXPECT_EQ(NumFinalizationPoints, 2U); - EXPECT_TRUE(FakeDestructor->hasNUses(2)); + EXPECT_EQ(NumFinalizationPoints, 1U); + EXPECT_TRUE(FakeDestructor->hasNUses(1)); Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); @@ -2916,7 +2905,8 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); + BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor(); + BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -2928,7 +2918,7 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { EXPECT_TRUE(isa(MasterEntryCI->getArgOperand(0))); CallInst *MasterEndCI = nullptr; - for (auto &FI : *ThenBB) { + for (auto &FI : *FinalizeBB) { Instruction *cur = &FI; if (isa(cur)) { MasterEndCI = cast(cur); @@ -2998,7 +2988,8 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) { BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); + BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor(); + BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -3010,7 +3001,7 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) { EXPECT_TRUE(isa(MaskedEntryCI->getArgOperand(0))); CallInst *MaskedEndCI = nullptr; - for (auto &FI : *ThenBB) { + for (auto &FI : *FinalizeBB) { Instruction *cur = &FI; if (isa(cur)) { MaskedEndCI = cast(cur); @@ -3062,6 +3053,9 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { FINICB_WRAPPER(FiniCB), "testCRT", nullptr)); Builder.restoreIP(AfterIP); + BasicBlock *FinalizeBB = EntryBB->getUniqueSuccessor(); + EXPECT_NE(FinalizeBB, nullptr); + CallInst *CriticalEntryCI = nullptr; for (auto &EI : *EntryBB) { Instruction *cur = &EI; @@ -3078,7 +3072,7 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { EXPECT_TRUE(isa(CriticalEntryCI->getArgOperand(0))); CallInst *CriticalEndCI = nullptr; - for (auto &FI : *EntryBB) { + for (auto &FI : *FinalizeBB) { Instruction *cur = &FI; if (isa(cur)) { CriticalEndCI = cast(cur); @@ -3312,6 +3306,9 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { FINICB_WRAPPER(FiniCB), true)); Builder.restoreIP(AfterIP); + BasicBlock *FinalizeBB = EntryBB->getUniqueSuccessor(); + EXPECT_NE(FinalizeBB, nullptr); + Builder.CreateRetVoid(); OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -3334,7 +3331,7 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { EXPECT_TRUE(isa(OrderedEntryCI->getArgOperand(0))); CallInst *OrderedEndCI = nullptr; - for (auto &FI : *EntryBB) { + for (auto &FI : *FinalizeBB) { Instruction *Cur = &FI; if (isa(Cur)) { OrderedEndCI = cast(Cur); @@ -3508,7 +3505,8 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); + BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor(); + BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -3520,7 +3518,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { EXPECT_TRUE(isa(SingleEntryCI->getArgOperand(0))); CallInst *SingleEndCI = nullptr; - for (auto &FI : *ThenBB) { + for (auto &FI : *FinalizeBB) { Instruction *cur = &FI; if (isa(cur)) { SingleEndCI = cast(cur); @@ -3601,7 +3599,8 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); + BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor(); + BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -3613,7 +3612,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { EXPECT_TRUE(isa(SingleEntryCI->getArgOperand(0))); CallInst *SingleEndCI = nullptr; - for (auto &FI : *ThenBB) { + for (auto &FI : *FinalizeBB) { Instruction *cur = &FI; if (isa(cur)) { SingleEndCI = cast(cur); @@ -3724,7 +3723,8 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { BranchInst *EntryBr = cast(EntryBB->getTerminator()); EXPECT_TRUE(EntryBr->isConditional()); EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); - BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); + BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor(); + BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor(); EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); CmpInst *CondInst = cast(EntryBr->getCondition()); @@ -3743,25 +3743,28 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI); // icmp EXPECT_TRUE(ThenBBI.next()); + + // check FinalizeBB + BBInstIter FinalizeBBI(FinalizeBB); // store 1, DidIt - auto *DidItSI = ThenBBI.next(); + auto *DidItSI = FinalizeBBI.next(); EXPECT_NE(DidItSI, nullptr); EXPECT_EQ(DidItSI->getValueOperand(), ConstantInt::get(Type::getInt32Ty(Ctx), 1)); Value *DidIt = DidItSI->getPointerOperand(); // call __kmpc_end_single - auto *SingleEndCI = ThenBBI.next(); + auto *SingleEndCI = FinalizeBBI.next(); EXPECT_NE(SingleEndCI, nullptr); EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single"); EXPECT_EQ(SingleEndCI->arg_size(), 2U); EXPECT_TRUE(isa(SingleEndCI->getArgOperand(0))); EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); // br ExitBB - auto *ExitBBBI = ThenBBI.next(); + auto *ExitBBBI = FinalizeBBI.next(); EXPECT_NE(ExitBBBI, nullptr); EXPECT_TRUE(ExitBBBI->isUnconditional()); EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB); - EXPECT_FALSE(ThenBBI.hasNext()); + EXPECT_FALSE(FinalizeBBI.hasNext()); // check ExitBB BBInstIter ExitBBI(ExitBB); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 0d5b553c8e652..cdab9f87a8758 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2729,6 +2729,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, ArrayRef isByRef = getIsByRef(opInst.getReductionByref()); assert(isByRef.size() == opInst.getNumReductionVars()); llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + bool isCancellable = constructIsCancellable(opInst); if (failed(checkImplementationStatus(*opInst))) return failure(); @@ -2867,6 +2868,18 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, privateVarsInfo.privatizers))) return llvm::make_error(); + // If we could be performing cancellation, add the cancellation barrier on + // the way out of the outlined region. + if (isCancellable) { + auto IPOrErr = ompBuilder->createBarrier( + llvm::OpenMPIRBuilder::LocationDescription(builder), + llvm::omp::Directive::OMPD_unknown, + /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + if (!IPOrErr) + return IPOrErr.takeError(); + } + builder.restoreIP(oldIP); return llvm::Error::success(); }; @@ -2880,7 +2893,6 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, auto pbKind = llvm::omp::OMP_PROC_BIND_default; if (auto bind = opInst.getProcBindKind()) pbKind = getProcBindKind(*bind); - bool isCancellable = constructIsCancellable(opInst); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); diff --git a/mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir b/mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir index c4b245667a1f3..6585549de7f96 100644 --- a/mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir +++ b/mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir @@ -29,22 +29,24 @@ llvm.func @test() { // CHECK: %[[VAL_14:.*]] = icmp eq i32 %[[VAL_13]], 0 // CHECK: br i1 %[[VAL_14]], label %[[VAL_15:.*]], label %[[VAL_16:.*]] // CHECK: omp.par.region1.cncl: ; preds = %[[VAL_11]] -// CHECK: %[[VAL_17:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK: %[[VAL_18:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 %[[VAL_17]]) -// CHECK: br label %[[VAL_19:.*]] +// CHECK: br label %[[FINI:.*]] +// CHECK: .fini: +// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: %[[CNCL_BARRIER:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 %[[TID]]) +// CHECK: br label %[[EXIT_STUB:.*]] // CHECK: omp.par.region1.split: ; preds = %[[VAL_11]] // CHECK: %[[VAL_20:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: %[[VAL_21:.*]] = call i32 @__kmpc_cancel_barrier(ptr @3, i32 %[[VAL_20]]) // CHECK: %[[VAL_22:.*]] = icmp eq i32 %[[VAL_21]], 0 // CHECK: br i1 %[[VAL_22]], label %[[VAL_23:.*]], label %[[VAL_24:.*]] // CHECK: omp.par.region1.split.cncl: ; preds = %[[VAL_15]] -// CHECK: br label %[[VAL_19]] +// CHECK: br label %[[FINI]] // CHECK: omp.par.region1.split.cont: ; preds = %[[VAL_15]] // CHECK: br label %[[VAL_25:.*]] // CHECK: omp.region.cont: ; preds = %[[VAL_23]] // CHECK: br label %[[VAL_26:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_25]] -// CHECK: br label %[[VAL_19]] -// CHECK: omp.par.exit.exitStub: ; preds = %[[VAL_26]], %[[VAL_24]], %[[VAL_16]] +// CHECK: br label %[[FINI]] +// CHECK: omp.par.exit.exitStub: // CHECK: ret void diff --git a/mlir/test/Target/LLVMIR/openmp-cancel.mlir b/mlir/test/Target/LLVMIR/openmp-cancel.mlir index 21241702ad569..5e20b8793f499 100644 --- a/mlir/test/Target/LLVMIR/openmp-cancel.mlir +++ b/mlir/test/Target/LLVMIR/openmp-cancel.mlir @@ -24,16 +24,18 @@ llvm.func @cancel_parallel() { // CHECK: %[[VAL_15:.*]] = icmp eq i32 %[[VAL_14]], 0 // CHECK: br i1 %[[VAL_15]], label %[[VAL_16:.*]], label %[[VAL_17:.*]] // CHECK: omp.par.region1.cncl: ; preds = %[[VAL_12]] +// CHECK: br label %[[VAL_20:.*]] +// CHECK: .fini: // CHECK: %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: %[[VAL_19:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 %[[VAL_18]]) -// CHECK: br label %[[VAL_20:.*]] +// CHECK: br label %[[EXIT_STUB:.*]] // CHECK: omp.par.region1.split: ; preds = %[[VAL_12]] // CHECK: br label %[[VAL_21:.*]] // CHECK: omp.region.cont: ; preds = %[[VAL_16]] // CHECK: br label %[[VAL_22:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_21]] // CHECK: br label %[[VAL_20]] -// CHECK: omp.par.exit.exitStub: ; preds = %[[VAL_22]], %[[VAL_17]] +// CHECK: omp.par.exit.exitStub: // CHECK: ret void llvm.func @cancel_parallel_if(%arg0 : i1) { @@ -67,18 +69,20 @@ llvm.func @cancel_parallel_if(%arg0 : i1) { // CHECK: br label %[[VAL_26:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_25]] // CHECK: br label %[[VAL_27:.*]] -// CHECK: 5: ; preds = %[[VAL_20]] +// CHECK: .fini: +// CHECK: %[[VAL_32:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: %[[VAL_33:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 %[[VAL_32]]) +// CHECK: br label %[[EXIT_STUB:.*]] +// CHECK: 6: ; preds = %[[VAL_20]] // CHECK: %[[VAL_28:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: %[[VAL_29:.*]] = call i32 @__kmpc_cancel(ptr @1, i32 %[[VAL_28]], i32 1) // CHECK: %[[VAL_30:.*]] = icmp eq i32 %[[VAL_29]], 0 // CHECK: br i1 %[[VAL_30]], label %[[VAL_24]], label %[[VAL_31:.*]] // CHECK: .cncl: ; preds = %[[VAL_21]] -// CHECK: %[[VAL_32:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK: %[[VAL_33:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 %[[VAL_32]]) // CHECK: br label %[[VAL_27]] // CHECK: .split: ; preds = %[[VAL_21]] // CHECK: br label %[[VAL_23]] -// CHECK: omp.par.exit.exitStub: ; preds = %[[VAL_31]], %[[VAL_26]] +// CHECK: omp.par.exit.exitStub: // CHECK: ret void llvm.func @cancel_sections_if(%cond : i1) { @@ -145,14 +149,12 @@ llvm.func @cancel_sections_if(%cond : i1) { // CHECK: omp_section_loop.inc: ; preds = %[[VAL_23]] // CHECK: %[[VAL_15]] = add nuw i32 %[[VAL_14]], 1 // CHECK: br label %[[VAL_12]] -// CHECK: omp_section_loop.exit: ; preds = %[[VAL_33]], %[[VAL_16]] +// CHECK: omp_section_loop.exit: // CHECK: call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_7]]) // CHECK: %[[VAL_36:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_36]]) // CHECK: br label %[[VAL_37:.*]] // CHECK: omp_section_loop.after: ; preds = %[[VAL_19]] -// CHECK: br label %[[VAL_38:.*]] -// CHECK: omp_section_loop.aftersections.fini: ; preds = %[[VAL_37]] // CHECK: ret void // CHECK: .cncl: ; preds = %[[VAL_27]] // CHECK: br label %[[VAL_19]] @@ -232,7 +234,7 @@ llvm.func @cancel_wsloop_if(%lb : i32, %ub : i32, %step : i32, %cond : i1) { // CHECK: omp_loop.inc: ; preds = %[[VAL_52]] // CHECK: %[[VAL_34]] = add nuw i32 %[[VAL_33]], 1 // CHECK: br label %[[VAL_31]] -// CHECK: omp_loop.exit: ; preds = %[[VAL_50]], %[[VAL_35]] +// CHECK: omp_loop.exit: // CHECK: call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_26]]) // CHECK: %[[VAL_53:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_53]]) diff --git a/mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir b/mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir index 5e0d3f9f7e293..93fa2064ab99a 100644 --- a/mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir +++ b/mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir @@ -24,16 +24,18 @@ llvm.func @cancellation_point_parallel() { // CHECK: %[[VAL_15:.*]] = icmp eq i32 %[[VAL_14]], 0 // CHECK: br i1 %[[VAL_15]], label %[[VAL_16:.*]], label %[[VAL_17:.*]] // CHECK: omp.par.region1.cncl: ; preds = %[[VAL_12]] +// CHECK: br label %[[FINI:.*]] +// CHECK: .fini: // CHECK: %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: %[[VAL_19:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 %[[VAL_18]]) -// CHECK: br label %[[VAL_20:.*]] +// CHECK: br label %[[EXIT_STUB:.*]] // CHECK: omp.par.region1.split: ; preds = %[[VAL_12]] // CHECK: br label %[[VAL_21:.*]] // CHECK: omp.region.cont: ; preds = %[[VAL_16]] // CHECK: br label %[[VAL_22:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_21]] -// CHECK: br label %[[VAL_20]] -// CHECK: omp.par.exit.exitStub: ; preds = %[[VAL_22]], %[[VAL_17]] +// CHECK: br label %[[FINI]] +// CHECK: omp.par.exit.exitStub: // CHECK: ret void llvm.func @cancellation_point_sections() { @@ -94,14 +96,12 @@ llvm.func @cancellation_point_sections() { // CHECK: omp_section_loop.inc: ; preds = %[[VAL_46]] // CHECK: %[[VAL_38]] = add nuw i32 %[[VAL_37]], 1 // CHECK: br label %[[VAL_35]] -// CHECK: omp_section_loop.exit: ; preds = %[[VAL_53]], %[[VAL_39]] +// CHECK: omp_section_loop.exit: // CHECK: call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_30]]) // CHECK: %[[VAL_55:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_55]]) // CHECK: br label %[[VAL_56:.*]] // CHECK: omp_section_loop.after: ; preds = %[[VAL_42]] -// CHECK: br label %[[VAL_57:.*]] -// CHECK: omp_section_loop.aftersections.fini: ; preds = %[[VAL_56]] // CHECK: ret void // CHECK: omp.section.region.cncl: ; preds = %[[VAL_48]] // CHECK: br label %[[VAL_42]] @@ -175,7 +175,7 @@ llvm.func @cancellation_point_wsloop(%lb : i32, %ub : i32, %step : i32) { // CHECK: omp_loop.inc: ; preds = %[[VAL_106]] // CHECK: %[[VAL_92]] = add nuw i32 %[[VAL_91]], 1 // CHECK: br label %[[VAL_89]] -// CHECK: omp_loop.exit: ; preds = %[[VAL_105]], %[[VAL_93]] +// CHECK: omp_loop.exit: // CHECK: call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_84]]) // CHECK: %[[VAL_107:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_107]]) diff --git a/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir b/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir index faccfc678adfe..99f37c7e79be8 100644 --- a/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir +++ b/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir @@ -21,9 +21,11 @@ llvm.func @parallel_infinite_loop() -> () { // CHECK: omp.region.cont: ; No predecessors! // CHECK: br label %[[VAL_4:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_5:.*]] -// CHECK: br label %[[VAL_6:.*]] -// CHECK: omp.par.exit: ; preds = %[[VAL_4]] +// CHECK: br label %[[FINI:.*]] +// CHECK: [[OMP_PAR_EXIT:omp.par.exit]]: ; preds = %[[FINI]] // CHECK: ret void +// CHECK: [[FINI]]: +// CHECK: br label %[[OMP_PAR_EXIT]] // CHECK: } // CHECK-LABEL: define internal void @parallel_infinite_loop..omp_par( diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir index 887d2977e45cc..c79c369b69d7f 100644 --- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir @@ -108,6 +108,8 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: ! // CHECK: reduce.finalize: ; preds = %[[VAL_49]], %[[VAL_43]] // CHECK: br label %[[VAL_53:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_48]] +// CHECK: br label %[[FINI:.*]] +// CHECK: .fini: // CHECK: %[[VAL_54:.*]] = load ptr, ptr %[[VAL_20]], align 8 // CHECK: %[[VAL_55:.*]] = load ptr, ptr %[[VAL_21]], align 8 // CHECK: br label %[[VAL_56:.*]] @@ -115,5 +117,5 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: ! // CHECK: br label %[[VAL_38]] // CHECK: omp.reduction.neutral1: ; preds = %[[VAL_25]] // CHECK: br label %[[VAL_30]] -// CHECK: omp.par.exit.exitStub: ; preds = %[[VAL_53]] +// CHECK: omp.par.exit.exitStub: ; preds = %[[FINI]] // CHECK: ret void diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir index b302b4b20edd5..13f52f054869e 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir @@ -127,8 +127,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_36]]) // CHECK: br label %[[VAL_37:.*]] // CHECK: omp_section_loop.after: ; preds = %[[VAL_35]] -// CHECK: br label %[[VAL_38:.*]] -// CHECK: omp_section_loop.aftersections.fini: ; preds = %[[VAL_37]] // CHECK: %[[VAL_39:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_14]], i64 0, i64 0 // CHECK: store ptr %[[VAL_21]], ptr %[[VAL_39]], align 8 // CHECK: %[[VAL_40:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) @@ -137,9 +135,9 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: i32 1, label %[[VAL_43:.*]] // CHECK: i32 2, label %[[VAL_44:.*]] // CHECK: ] -// CHECK: reduce.switch.atomic: ; preds = %[[VAL_38]] +// CHECK: reduce.switch.atomic: ; preds = %[[VAL_37]] // CHECK: unreachable -// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_38]] +// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_37]] // CHECK: %[[VAL_45:.*]] = load ptr, ptr %[[VAL_21]], align 8 // CHECK: br label %[[VAL_46:.*]] // CHECK: omp.reduction.nonatomic.body: ; preds = %[[VAL_43]] @@ -157,7 +155,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: omp.reduction.nonatomic.body17: ; preds = %[[VAL_47]] // CHECK: %[[VAL_50]] = sub i64 %[[VAL_49]], 1 // CHECK: br label %[[VAL_47]] -// CHECK: reduce.finalize: ; preds = %[[VAL_53]], %[[VAL_38]] +// CHECK: reduce.finalize: ; preds = %[[VAL_53]], %[[VAL_37]] // CHECK: %[[VAL_55:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_55]]) // CHECK: %[[VAL_56:.*]] = load ptr, ptr %[[VAL_21]], align 8 @@ -173,7 +171,9 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: omp.region.cont: ; preds = %[[VAL_62]] // CHECK: br label %[[VAL_64:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_63]] -// CHECK: br label %[[VAL_65:.*]] +// CHECK: br label %[[FINI:.fini.*]] +// CHECK: [[FINI]]: +// CHECK: br label %[[EXIT:.*]] // CHECK: omp.reduction.cleanup21: ; preds = %[[VAL_57]] // CHECK: br label %[[VAL_61]] // CHECK: omp_section_loop.body: ; preds = %[[VAL_32]] @@ -219,5 +219,5 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: omp_section_loop.inc: ; preds = %[[VAL_69]] // CHECK: %[[VAL_31]] = add nuw i32 %[[VAL_30]], 1 // CHECK: br label %[[VAL_28]] -// CHECK: omp.par.exit.exitStub: ; preds = %[[VAL_64]] +// CHECK: omp.par.exit.exitStub: ; preds = %[[FINI]] // CHECK: ret void diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir index a714ca68a1e95..cb30d3b2f4473 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir @@ -96,8 +96,10 @@ module { // CHECK: reduce.finalize: ; preds = %[[VAL_34]], %[[VAL_28]] // CHECK: br label %[[VAL_38:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_33]] +// CHECK: br label %[[FINI:.*]] +// CHECK: [[FINI]]: // CHECK: br label %[[VAL_39:.*]] -// CHECK: omp.par.exit.exitStub: ; preds = %[[VAL_38]] +// CHECK: omp.par.exit.exitStub: ; preds = %[[FINI]] // CHECK: ret void // CHECK: %[[VAL_40:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_41:.*]], i64 0, i64 0 // CHECK: %[[VAL_42:.*]] = load ptr, ptr %[[VAL_40]], align 8 diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir index 19da6f8517fcd..00f6c1b02206e 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir @@ -86,8 +86,6 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in // CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_40]]) // CHECK: br label %[[VAL_41:.*]] // CHECK: omp_section_loop.after: ; preds = %[[VAL_39]] -// CHECK: br label %[[VAL_42:.*]] -// CHECK: omp_section_loop.aftersections.fini: ; preds = %[[VAL_41]] // CHECK: %[[VAL_43:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_21]], i64 0, i64 0 // CHECK: store ptr %[[VAL_20]], ptr %[[VAL_43]], align 8 // CHECK: %[[VAL_44:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) @@ -96,23 +94,25 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in // CHECK: i32 1, label %[[VAL_47:.*]] // CHECK: i32 2, label %[[VAL_48:.*]] // CHECK: ] -// CHECK: reduce.switch.atomic: ; preds = %[[VAL_42]] +// CHECK: reduce.switch.atomic: ; preds = %[[VAL_41]] // CHECK: unreachable -// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_42]] +// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_41]] // CHECK: %[[VAL_49:.*]] = load float, ptr %[[VAL_11]], align 4 // CHECK: %[[VAL_50:.*]] = load float, ptr %[[VAL_20]], align 4 // CHECK: %[[VAL_51:.*]] = fadd contract float %[[VAL_49]], %[[VAL_50]] // CHECK: store float %[[VAL_51]], ptr %[[VAL_11]], align 4 // CHECK: call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_44]], ptr @.gomp_critical_user_.reduction.var) // CHECK: br label %[[VAL_46]] -// CHECK: reduce.finalize: ; preds = %[[VAL_47]], %[[VAL_42]] +// CHECK: reduce.finalize: ; preds = %[[VAL_47]], %[[VAL_41]] // CHECK: %[[VAL_52:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: call void @__kmpc_barrier(ptr @2, i32 %[[VAL_52]]) // CHECK: br label %[[VAL_53:.*]] // CHECK: omp.region.cont: ; preds = %[[VAL_46]] // CHECK: br label %[[VAL_54:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_53]] -// CHECK: br label %[[VAL_55:.*]] +// CHECK: br label %[[FINI:.fini.*]] +// CHECK: [[FINI]]: +// CHECK: br label %[[EXIT:.*]] // CHECK: omp_section_loop.body: ; preds = %[[VAL_36]] // CHECK: %[[VAL_56:.*]] = add i32 %[[VAL_34]], %[[VAL_28]] // CHECK: %[[VAL_57:.*]] = mul i32 %[[VAL_56]], 1 @@ -144,8 +144,10 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in // CHECK: omp_section_loop.inc: ; preds = %[[VAL_59]] // CHECK: %[[VAL_35]] = add nuw i32 %[[VAL_34]], 1 // CHECK: br label %[[VAL_32]] -// CHECK: omp.par.exit.exitStub: ; preds = %[[VAL_54]] +// CHECK: omp.par.exit.exitStub: ; preds = %[[FINI]] // CHECK: ret void + +// CHECK-LABEL: define internal void @.omp.reduction.func // CHECK: %[[VAL_70:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_71:.*]], i64 0, i64 0 // CHECK: %[[VAL_72:.*]] = load ptr, ptr %[[VAL_70]], align 8 // CHECK: %[[VAL_73:.*]] = load float, ptr %[[VAL_72]], align 4