61 changes: 24 additions & 37 deletions llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
Optional<bool> UserAllowProfileBasedPeeling,
Optional<unsigned> UserFullUnrollMaxCount) {
TargetTransformInfo::UnrollingPreferences UP;

// Set up the defaults
Expand All @@ -204,6 +206,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.PartialThreshold = 150;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
UP.PeelCount = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
Expand All @@ -215,7 +218,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
UP.AllowPeeling = true;
UP.AllowLoopNestsPeeling = false;
UP.UnrollAndJam = false;
UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;

Expand Down Expand Up @@ -243,6 +249,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxCount = UnrollMaxCount;
if (UnrollFullMaxCount.getNumOccurrences() > 0)
UP.FullUnrollMaxCount = UnrollFullMaxCount;
if (UnrollPeelCount.getNumOccurrences() > 0)
UP.PeelCount = UnrollPeelCount;
if (UnrollAllowPartial.getNumOccurrences() > 0)
UP.Partial = UnrollAllowPartial;
if (UnrollAllowRemainder.getNumOccurrences() > 0)
Expand All @@ -251,6 +259,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Runtime = UnrollRuntime;
if (UnrollMaxUpperBound == 0)
UP.UpperBound = false;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
UP.AllowPeeling = UnrollAllowPeeling;
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
if (UnrollUnrollRemainder.getNumOccurrences() > 0)
UP.UnrollRemainder = UnrollUnrollRemainder;
if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
Expand All @@ -269,39 +281,16 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Runtime = *UserRuntime;
if (UserUpperBound.hasValue())
UP.UpperBound = *UserUpperBound;
if (UserAllowPeeling.hasValue())
UP.AllowPeeling = *UserAllowPeeling;
if (UserAllowProfileBasedPeeling.hasValue())
UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
if (UserFullUnrollMaxCount.hasValue())
UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;

return UP;
}

TargetTransformInfo::PeelingPreferences
llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &TTI,
Optional<bool> UserAllowPeeling,
Optional<bool> UserAllowProfileBasedPeeling) {
TargetTransformInfo::PeelingPreferences PP;

// Get Target Specifc Values
TTI.getPeelingPreferences(L, SE, PP);

// User Specified Values using cl::opt
if (UnrollPeelCount.getNumOccurrences() > 0)
PP.PeelCount = UnrollPeelCount;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
PP.AllowPeeling = UnrollAllowPeeling;
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;

// User Specifed values provided by argument
if (UserAllowPeeling.hasValue())
PP.AllowPeeling = *UserAllowPeeling;
if (UserAllowProfileBasedPeeling.hasValue())
PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;

return PP;
}

namespace {

/// A struct to densely store the state of an instruction after unrolling at
Expand Down Expand Up @@ -772,8 +761,7 @@ bool llvm::computeUnrollCount(
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {

// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
Expand Down Expand Up @@ -875,8 +863,8 @@ bool llvm::computeUnrollCount(
}

// 4th priority is loop peeling.
computePeelCount(L, LoopSize, UP, PP, TripCount, SE);
if (PP.PeelCount) {
computePeelCount(L, LoopSize, UP, TripCount, SE);
if (UP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
return ExplicitUnroll;
Expand Down Expand Up @@ -1079,9 +1067,8 @@ static LoopUnrollResult tryToUnrollLoop(
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
ProvidedFullUnrollMaxCount);
TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling);

// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
// as threshold later on.
Expand Down Expand Up @@ -1155,7 +1142,7 @@ static LoopUnrollResult tryToUnrollLoop(
bool UseUpperBound = false;
bool IsCountSetExplicitly = computeUnrollCount(
L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
TripMultiple, LoopSize, UP, PP, UseUpperBound);
TripMultiple, LoopSize, UP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
// Unroll factor (Count) must be less or equal to TripCount.
Expand All @@ -1170,7 +1157,7 @@ static LoopUnrollResult tryToUnrollLoop(
LoopUnrollResult UnrollResult = UnrollLoop(
L,
{UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder,
UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
ForgetAllSCEV},
LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
if (UnrollResult == LoopUnrollResult::Unmodified)
Expand Down Expand Up @@ -1202,7 +1189,7 @@ static LoopUnrollResult tryToUnrollLoop(
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
(IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount)))
(IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
L->setLoopAlreadyUnrolled();

return UnrollResult;
Expand Down
25 changes: 12 additions & 13 deletions llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,34 +279,33 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
TargetTransformInfo::PeelingPreferences &PP,
unsigned &TripCount, ScalarEvolution &SE) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
unsigned TargetPeelCount = PP.PeelCount;
PP.PeelCount = 0;
// Save the UP.PeelCount value set by the target in
// TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
unsigned TargetPeelCount = UP.PeelCount;
UP.PeelCount = 0;
if (!canPeel(L))
return;

// Only try to peel innermost loops by default.
// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
// or by the flag -unroll-allow-loop-nests-peeling.
if (!PP.AllowLoopNestsPeeling && !L->empty())
if (!UP.AllowLoopNestsPeeling && !L->empty())
return;

// If the user provided a peel count, use that.
bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
if (UserPeelCount) {
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
<< " iterations.\n");
PP.PeelCount = UnrollForcePeelCount;
PP.PeelProfiledIterations = true;
UP.PeelCount = UnrollForcePeelCount;
UP.PeelProfiledIterations = true;
return;
}

// Skip peeling if it's disabled.
if (!PP.AllowPeeling)
if (!UP.AllowPeeling)
return;

unsigned AlreadyPeeled = 0;
Expand Down Expand Up @@ -355,8 +354,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
<< " iteration(s) to turn"
<< " some Phis into invariants.\n");
PP.PeelCount = DesiredPeelCount;
PP.PeelProfiledIterations = false;
UP.PeelCount = DesiredPeelCount;
UP.PeelProfiledIterations = false;
return;
}
}
Expand All @@ -368,7 +367,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
return;

// Do not apply profile base peeling if it is disabled.
if (!PP.PeelProfiledIterations)
if (!UP.PeelProfiledIterations)
return;
// If we don't know the trip count, but have reason to believe the average
// trip count is low, peeling should be beneficial, since we will usually
Expand All @@ -388,7 +387,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
<< " iterations.\n");
PP.PeelCount = *PeelCount;
UP.PeelCount = *PeelCount;
return;
}
LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/Transforms/SimplifyCFG/pr46638.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -simplifycfg < %s | FileCheck %s

define void @pr46638(i1 %c, i32 %x) {
; CHECK-LABEL: @pr46638(
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[X:%.*]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]])
; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]]
; CHECK: false1:
; CHECK-NEXT: call void @dummy(i32 1)
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP2]] to i32
; CHECK-NEXT: call void @dummy(i32 [[EXT]])
; CHECK-NEXT: ret void
; CHECK: true2.critedge:
; CHECK-NEXT: [[CMP2_C:%.*]] = icmp sgt i32 [[X]], 0
; CHECK-NEXT: [[EXT_C:%.*]] = zext i1 [[CMP2_C]] to i32
; CHECK-NEXT: call void @dummy(i32 [[EXT_C]])
; CHECK-NEXT: call void @dummy(i32 2)
; CHECK-NEXT: ret void
;
%cmp1 = icmp slt i32 %x, 0
call void @llvm.assume(i1 %cmp1)
br i1 %c, label %true1, label %false1

true1:
%cmp2 = icmp sgt i32 %x, 0
%ext = zext i1 %cmp2 to i32
call void @dummy(i32 %ext)
br i1 %c, label %true2, label %false2

false1:
call void @dummy(i32 1)
br label %true1

true2:
call void @dummy(i32 2)
ret void

false2:
ret void
}

declare void @dummy(i32)
declare void @llvm.assume(i1)