diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 63493eb78c451..d49c23336146b 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2621,6 +2621,17 @@ struct AAICVTrackerCallSiteReturned : AAICVTracker { } }; +/// Determines if \p BB exits the function unconditionally itself or reaches a +/// block that does through only unique successors. +static bool hasFunctionEndAsUniqueSuccessor(const BasicBlock *BB) { + if (succ_empty(BB)) + return true; + const BasicBlock *const Successor = BB->getUniqueSuccessor(); + if (!Successor) + return false; + return hasFunctionEndAsUniqueSuccessor(Successor); +} + struct AAExecutionDomainFunction : public AAExecutionDomain { AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) : AAExecutionDomain(IRP, A) {} @@ -2676,17 +2687,19 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { if (!ED.EncounteredAssumes.empty() && !A.isModulePass()) return; - // We can remove this barrier, if it is one, or all aligned barriers - // reaching the kernel end. In the latter case we can transitively work - // our way back until we find a barrier that guards a side-effect if we - // are dealing with the kernel end here. + // We can remove this barrier, if it is one, or aligned barriers reaching + // the kernel end (if CB is nullptr). Aligned barriers reaching the kernel + // end should only be removed if the kernel end is their unique successor; + // otherwise, they may have side-effects that aren't accounted for in the + // kernel end in their other successors. If those barriers have other + // barriers reaching them, those can be transitively removed as well as + // long as the kernel end is also their unique successor. if (CB) { DeletedBarriers.insert(CB); A.deleteAfterManifest(*CB); ++NumBarriersEliminated; Changed = ChangeStatus::CHANGED; } else if (!ED.AlignedBarriers.empty()) { - NumBarriersEliminated += ED.AlignedBarriers.size(); Changed = ChangeStatus::CHANGED; SmallVector Worklist(ED.AlignedBarriers.begin(), ED.AlignedBarriers.end()); @@ -2697,7 +2710,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { continue; if (LastCB->getFunction() != getAnchorScope()) continue; + if (!hasFunctionEndAsUniqueSuccessor(LastCB->getParent())) + continue; if (!DeletedBarriers.count(LastCB)) { + ++NumBarriersEliminated; A.deleteAfterManifest(*LastCB); continue; } @@ -2978,7 +2994,7 @@ bool AAExecutionDomainFunction::handleCallees(Attributor &A, EntryBBED.IsExecutedByInitialThreadOnly = false; EntryBBED.IsReachedFromAlignedBarrierOnly = true; EntryBBED.EncounteredNonLocalSideEffect = false; - ExitED.IsReachingAlignedBarrierOnly = true; + ExitED.IsReachingAlignedBarrierOnly = false; } else { EntryBBED.IsExecutedByInitialThreadOnly = false; EntryBBED.IsReachedFromAlignedBarrierOnly = false; diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll index e45f746cbf439..8aca820afdfa8 100644 --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -1065,8 +1065,174 @@ define void @caller_barrier2() "kernel" { ret void } +define void @loop_barrier() "kernel" { +; CHECK-LABEL: define {{[^@]+}}@loop_barrier +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + call void @unknown() + call void @aligned_barrier() + %i.next = add nuw nsw i32 %i, 1 + %cond = icmp ne i32 %i.next, 128 + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +define void @loop_barrier_end_barriers() "kernel" { +; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + call void @unknown() + call void @aligned_barrier() + %i.next = add nuw nsw i32 %i, 1 + %cond = icmp ne i32 %i.next, 128 + br i1 %cond, label %loop, label %exit + +exit: + call void @aligned_barrier() + call void @aligned_barrier() + call void @aligned_barrier() + call void @aligned_barrier() + ret void +} + +define void @loop_barrier_end_barriers_unknown() "kernel" { +; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + call void @unknown() + call void @aligned_barrier() + %i.next = add nuw nsw i32 %i, 1 + %cond = icmp ne i32 %i.next, 128 + br i1 %cond, label %loop, label %exit + +exit: + call void @aligned_barrier() + call void @aligned_barrier() + call void @unknown() + call void @aligned_barrier() + call void @aligned_barrier() + ret void +} + +define void @loop_barrier_store() "kernel" { +; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + store i32 %i, ptr @G1 + call void @aligned_barrier() + %i.next = add nuw nsw i32 %i, 1 + %cond = icmp ne i32 %i.next, 128 + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +define void @loop_barrier_end_barriers_store() "kernel" { +; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: store i32 [[I_NEXT]], ptr @G1, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + store i32 %i, ptr @G1 + call void @aligned_barrier() + %i.next = add nuw nsw i32 %i, 1 + %cond = icmp ne i32 %i.next, 128 + br i1 %cond, label %loop, label %exit + +exit: + call void @aligned_barrier() + call void @aligned_barrier() + store i32 %i.next, ptr @G1 + call void @aligned_barrier() + call void @aligned_barrier() + ret void +} + !llvm.module.flags = !{!16,!15} -!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25} +!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26,!27,!28,!29,!30} !0 = !{ptr @pos_empty_1, !"kernel", i32 1} !1 = !{ptr @pos_empty_2, !"kernel", i32 1} @@ -1079,6 +1245,11 @@ define void @caller_barrier2() "kernel" { !23 = !{ptr @pos_empty_8, !"kernel", i32 1} !24 = !{ptr @caller_barrier1, !"kernel", i32 1} !25 = !{ptr @caller_barrier2, !"kernel", i32 1} +!26 = !{ptr @loop_barrier, !"kernel", i32 1} +!27 = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1} +!28 = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1} +!29 = !{ptr @loop_barrier_store, !"kernel", i32 1} +!30 = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1} !6 = !{ptr @neg_empty_8, !"kernel", i32 1} !19 = !{ptr @neg_empty_9, !"kernel", i32 1} !20 = !{ptr @pos_empty_10, !"kernel", i32 1} @@ -1128,4 +1299,9 @@ define void @caller_barrier2() "kernel" { ; CHECK: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1} ; CHECK: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1} ; CHECK: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1} +; CHECK: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1} +; CHECK: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1} +; CHECK: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1} +; CHECK: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1} +; CHECK: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1} ;.