Skip to content

Commit

Permalink
Reroll loops with multiple IV and negative step part 3
Browse files Browse the repository at this point in the history
            support multiple induction variables

    This patch enable loop reroll for the following case:
        for(int i=0;  i<N; i += 2) {
           S += *a++;
           S += *a++;
        };

Differential Revision: http://reviews.llvm.org/D16550

llvm-svn: 268147
  • Loading branch information
Lawrence Hu committed Apr 30, 2016
1 parent df29078 commit 1befea2
Show file tree
Hide file tree
Showing 2 changed files with 289 additions and 9 deletions.
164 changes: 155 additions & 9 deletions llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
Expand Up @@ -163,6 +163,9 @@ namespace {

// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> IVToIncMap;
// For loop with multiple induction variable, remember the one used only to
// control the loop.
Instruction *LoopControlIV;

// A chain of isomorphic instructions, identified by a single-use PHI
// representing a reduction. Only the last value may be used outside the
Expand Down Expand Up @@ -350,9 +353,11 @@ namespace {
ScalarEvolution *SE, AliasAnalysis *AA,
TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA,
DenseMap<Instruction *, int64_t> &IncrMap)
DenseMap<Instruction *, int64_t> &IncrMap,
Instruction *LoopCtrlIV)
: Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
LoopControlIV(LoopCtrlIV) {}

/// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots();
Expand Down Expand Up @@ -391,6 +396,7 @@ namespace {
UsesTy::iterator Start,
UsesTy::iterator End);
void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
void updateNonLoopCtrlIncr();

LoopReroll *Parent;

Expand Down Expand Up @@ -421,8 +427,18 @@ namespace {
UsesTy Uses;
// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> &IVToIncMap;
Instruction *LoopControlIV;
};

// Check if it is a compare-like instruction whose user is a branch
bool isCompareUsedByBranch(Instruction *I) {
auto *TI = I->getParent()->getTerminator();
if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
return false;
return I->hasOneUse() && TI->getOperand(0) == I;
};

bool isLoopControlIV(Loop *L, Instruction *IV);
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
void collectPossibleReductions(Loop *L,
ReductionTracker &Reductions);
Expand Down Expand Up @@ -494,6 +510,60 @@ static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE,
return CIncSCEV;
}

// Check if an IV is only used to control the loop. There are two cases:
// 1. It only has one use which is loop increment, and the increment is only
// used by comparison and the PHI, and the comparison is only used by branch.
// 2. It is used by loop increment and the comparison, the loop increment is
// only used by the PHI, and the comparison is used only by the branch.
bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {

unsigned IVUses = IV->getNumUses();
if (IVUses != 2 && IVUses != 1)
return false;

for (auto *User : IV->users()) {
int32_t IncOrCmpUses = User->getNumUses();
bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));

// User can only have one or two uses.
if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
return false;

// Case 1
if (IVUses == 1) {
// The only user must be the loop increment.
// The loop increment must have two uses.
if (IsCompInst || IncOrCmpUses != 2)
return false;
}

// Case 2
if (IVUses == 2 && IncOrCmpUses != 1)
return false;

// The users of the IV must be a binary operation or a comparison
if (auto *BO = dyn_cast<BinaryOperator>(User)) {
if (BO->getOpcode() == Instruction::Add) {
// Loop Increment
// User of Loop Increment should be either PHI or CMP
for (auto *UU : User->users()) {
if (PHINode *PN = dyn_cast<PHINode>(UU)) {
if (PN != IV)
return false;
}
// Must be a CMP
else if (!isCompareUsedByBranch(dyn_cast<Instruction>(UU)))
return false;
}
} else
return false;
// Compare : can only have one use, and must be branch
} else if (!IsCompInst)
return false;
}
return true;
}

// Collect the list of loop induction variables with respect to which it might
// be possible to reroll the loop.
void LoopReroll::collectPossibleIVs(Loop *L,
Expand Down Expand Up @@ -525,7 +595,14 @@ void LoopReroll::collectPossibleIVs(Loop *L,
IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
<< "\n");
PossibleIVs.push_back(&*I);

if (isLoopControlIV(L, &*I)) {
assert(!LoopControlIV && "Found two loop control only IV");
LoopControlIV = &(*I);
DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
<< *PHISCEV << "\n");
} else
PossibleIVs.push_back(&*I);
}
}
}
Expand Down Expand Up @@ -1072,6 +1149,28 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
Uses[I].set(IL_All);
}

// Make sure we mark loop-control-only PHIs as used in all iterations. See
// comment above LoopReroll::isLoopControlIV for more information.
BasicBlock *Header = L->getHeader();
if (LoopControlIV && LoopControlIV != IV) {
for (auto *U : LoopControlIV->users()) {
Instruction *IVUser = dyn_cast<Instruction>(U);
// IVUser could be loop increment or compare
Uses[IVUser].set(IL_All);
for (auto *UU : IVUser->users()) {
Instruction *UUser = dyn_cast<Instruction>(UU);
// UUser could be compare, PHI or branch
Uses[UUser].set(IL_All);
// Is UUser a compare instruction?
if (UU->hasOneUse()) {
Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
if (BI == cast<BranchInst>(Header->getTerminator()))
Uses[BI].set(IL_All);
}
}
}
}

// Make sure all instructions in the loop are in one and only one
// set.
for (auto &KV : Uses) {
Expand Down Expand Up @@ -1314,25 +1413,65 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
++J;
}

// We need to create a new induction variable for each different BaseInst.
for (auto &DRS : RootSets)
// Insert the new induction variable.
replaceIV(DRS.BaseInst, IV, IterCount);
bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;

if (HasTwoIVs) {
updateNonLoopCtrlIncr();
replaceIV(LoopControlIV, LoopControlIV, IterCount);
} else
// We need to create a new induction variable for each different BaseInst.
for (auto &DRS : RootSets)
// Insert the new induction variable.
replaceIV(DRS.BaseInst, IV, IterCount);

SimplifyInstructionsInBlock(Header, TLI);
DeleteDeadPHIs(Header, TLI);
}

// For non-loop-control IVs, we only need to update the last increment
// with right amount, then we are done.
void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
const SCEV *NewInc = nullptr;
for (auto *LoopInc : LoopIncs) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LoopInc);
const SCEVConstant *COp = nullptr;
if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
} else {
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
if (!COp)
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
}

assert(COp && "Didn't find constant operand of LoopInc!\n");

const APInt &AInt = COp->getValue()->getValue();
const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
if (AInt.isNegative()) {
NewInc = SE->getNegativeSCEV(COp);
NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
NewInc = SE->getNegativeSCEV(NewInc);
} else
NewInc = SE->getUDivExpr(COp, ScaleSCEV);

LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
}
}

void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
Instruction *InstIV,
const SCEV *IterCount) {
BasicBlock *Header = L->getHeader();
int64_t Inc = IVToIncMap[InstIV];
bool Negative = Inc < 0;
bool NeedNewIV = InstIV == LoopControlIV;
bool Negative = !NeedNewIV && Inc < 0;

const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
const SCEV *Start = RealIVSCEV->getStart();

if (NeedNewIV)
Start = SE->getConstant(Start->getType(), 0);

const SCEV *SizeOfExpr = nullptr;
const SCEV *IncrExpr =
SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
Expand Down Expand Up @@ -1360,6 +1499,12 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
if (Uses[BI].find_first() == IL_All) {
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);

if (NeedNewIV)
ICSCEV = SE->getMulExpr(IterCount,
SE->getConstant(IterCount->getType(), Scale));
else
ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);

// Iteration count SCEV minus or plus 1
const SCEV *MinusPlus1SCEV =
SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
Expand Down Expand Up @@ -1514,7 +1659,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount,
ReductionTracker &Reductions) {
DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
IVToIncMap);
IVToIncMap, LoopControlIV);

if (!DAGRoots.findRoots())
return false;
Expand Down Expand Up @@ -1566,6 +1711,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// reroll (there may be several possible options).
SmallInstructionVector PossibleIVs;
IVToIncMap.clear();
LoopControlIV = nullptr;
collectPossibleIVs(L, PossibleIVs);

if (PossibleIVs.empty()) {
Expand Down
134 changes: 134 additions & 0 deletions llvm/test/Transforms/LoopReroll/complex_reroll.ll
@@ -0,0 +1,134 @@
; RUN: opt -S -loop-reroll %s | FileCheck %s
declare i32 @goo(i32, i32)

@buf = external global i8*
@aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1

define i32 @test1(i32 %len) {
entry:
br label %while.body

while.body:
;CHECK-LABEL: while.body:
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ]
;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ]
;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1
;CHECK-NEXT: %conv = zext i8 [[T2]] to i64
;CHECK-NEXT: %add = add i64 %conv, %sum44.020
;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1
;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body

%dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ]
%buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
%sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ]
%0 = load i8, i8* %buf.021, align 1
%conv = zext i8 %0 to i64
%add = add i64 %conv, %sum44.020
%arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1
%1 = load i8, i8* %arrayidx1, align 1
%conv2 = zext i8 %1 to i64
%add3 = add i64 %add, %conv2
%arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2
%2 = load i8, i8* %arrayidx4, align 1
%conv5 = zext i8 %2 to i64
%add6 = add i64 %add3, %conv5
%arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3
%3 = load i8, i8* %arrayidx7, align 1
%conv8 = zext i8 %3 to i64
%add9 = add i64 %add6, %conv8
%add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4
%dec = add nsw i32 %dec22, -1
%tobool = icmp eq i32 %dec, 0
br i1 %tobool, label %while.end, label %while.body

while.end: ; preds = %while.body
%conv11 = trunc i64 %add9 to i32
%call = tail call i32 @goo(i32 0, i32 %conv11)
unreachable
}

define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) {
entry:
%cmp.9 = icmp sgt i32 %N, 0
br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup

for.body.lr.ph:
br label %for.body

for.cond.for.cond.cleanup_crit_edge:
br label %for.cond.cleanup

for.cond.cleanup:
%S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ]
ret i32 %S.addr.0.lcssa

for.body:
;CHECK-LABEL: for.body:
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ]
;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4
;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011
;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body

%i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
%S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ]
%a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
%incdec.ptr = getelementptr inbounds i32, i32* %a.addr.010, i64 1
%0 = load i32, i32* %a.addr.010, align 4
%add = add nsw i32 %0, %S.addr.011
%incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 2
%1 = load i32, i32* %incdec.ptr, align 4
%add2 = add nsw i32 %add, %1
%add3 = add nsw i32 %i.012, 2
%cmp = icmp slt i32 %add3, %N
br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
}

define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 {
entry:
%cmp10 = icmp sgt i32 %len, 1
br i1 %cmp10, label %while.body.preheader, label %while.end

while.body.preheader: ; preds = %entry
br label %while.body

while.body: ; preds = %while.body.preheader, %while.body
;CHECK-LABEL: while.body:
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4
;CHECK-NEXT: %add = add nsw i32 %4, %S.012
;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body

%i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ]
%S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
%buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
%0 = load i32, i32* %buf.addr.011, align 4
%add = add nsw i32 %0, %S.012
%arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
%1 = load i32, i32* %arrayidx1, align 4
%add2 = add nsw i32 %add, %1
%add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -2
%sub = add nsw i32 %i.013, -2
%cmp = icmp sgt i32 %sub, 1
br i1 %cmp, label %while.body, label %while.end.loopexit

while.end.loopexit: ; preds = %while.body
br label %while.end

while.end: ; preds = %while.end.loopexit, %entry
%S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ]
ret i32 %S.0.lcssa
}

0 comments on commit 1befea2

Please sign in to comment.