Skip to content

Commit

Permalink
[LoopIdiomRecognize] Support for converting loops that use LSHR to CTLZ.
Browse files Browse the repository at this point in the history
In the 'detectCTLZIdiom' function support for loops that use LSHR instruction instead of ASHR has been added.

This supports creating ctlz from the following code.

int lzcnt(int x) {
     int count = 0;
     while (x > 0)  {
          count++;
          x = x >> 1;
     }
    return count;
}

Patch by Olga Moldovanova

Differential Revision: https://reviews.llvm.org/D48354

llvm-svn: 336509
  • Loading branch information
topperc committed Jul 8, 2018
1 parent f1a981c commit 2835278
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 13 deletions.
34 changes: 21 additions & 13 deletions llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,9 @@ class LoopIdiomRecognize {
PHINode *CntPhi, Value *Var);
bool recognizeAndInsertCTLZ();
void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var, const DebugLoc &DL,
bool ZeroCheck, bool IsCntPhiUsedOutsideLoop);
PHINode *CntPhi, Value *Var, Instruction *DefX,
const DebugLoc &DL, bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop);

/// @}
};
Expand Down Expand Up @@ -1316,8 +1317,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
return false;

// step 2: detect instructions corresponding to "x.next = x >> 1"
// TODO: Support loops that use LShr.
if (!DefX || DefX->getOpcode() != Instruction::AShr)
if (!DefX || (DefX->getOpcode() != Instruction::AShr &&
DefX->getOpcode() != Instruction::LShr))
return false;
ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
if (!Shft || !Shft->isOne())
Expand Down Expand Up @@ -1401,8 +1402,7 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {

// Make sure the initial value can't be negative otherwise the ashr in the
// loop might never reach zero which would make the loop infinite.
// TODO: Support loops that use lshr and wouldn't need this check.
if (!isKnownNonNegative(InitX, *DL))
if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL))
return false;

// If we check X != 0 before entering the loop we don't need a zero
Expand Down Expand Up @@ -1433,8 +1433,9 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
TargetTransformInfo::TCC_Basic)
return false;

transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX->getDebugLoc(),
ZeroCheck, IsCntPhiUsedOutsideLoop);
transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX,
DefX->getDebugLoc(), ZeroCheck,
IsCntPhiUsedOutsideLoop);
return true;
}

Expand Down Expand Up @@ -1547,7 +1548,8 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
void LoopIdiomRecognize::transformLoopToCountable(
BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
Instruction *DefX, const DebugLoc &DL, bool ZeroCheck,
bool IsCntPhiUsedOutsideLoop) {
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());

// Step 1: Insert the CTLZ instruction at the end of the preheader block
Expand All @@ -1558,10 +1560,16 @@ void LoopIdiomRecognize::transformLoopToCountable(
Builder.SetCurrentDebugLocation(DL);
Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;

if (IsCntPhiUsedOutsideLoop)
InitXNext = Builder.CreateAShr(InitX,
ConstantInt::get(InitX->getType(), 1));
else
if (IsCntPhiUsedOutsideLoop) {
if (DefX->getOpcode() == Instruction::AShr)
InitXNext =
Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1));
else if (DefX->getOpcode() == Instruction::LShr)
InitXNext =
Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
else
llvm_unreachable("Unexpected opcode!");
} else
InitXNext = InitX;
CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
Count = Builder.CreateSub(
Expand Down
162 changes: 162 additions & 0 deletions llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,52 @@ while.end: ; preds = %while.end.loopexit,
ret i32 %i.0.lcssa
}

; Recognize CTLZ builtin pattern.
; Here it will replace the loop -
; assume builtin is always profitable.
;
; int ctlz_zero_check_lshr(int n)
; {
; int i = 0;
; while(n) {
; n >>= 1;
; i++;
; }
; return i;
; }
;
; ALL: entry
; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
; ALL-NEXT: %1 = sub i32 32, %0
; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
; ALL: ret i32 %i.0.lcssa

; Function Attrs: norecurse nounwind readnone uwtable
define i32 @ctlz_zero_check_lshr(i32 %n) {
entry:
%tobool4 = icmp eq i32 %n, 0
br i1 %tobool4, label %while.end, label %while.body.preheader

while.body.preheader: ; preds = %entry
br label %while.body

while.body: ; preds = %while.body.preheader, %while.body
%i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
%n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
%shr = lshr i32 %n.addr.05, 1
%inc = add nsw i32 %i.06, 1
%tobool = icmp eq i32 %shr, 0
br i1 %tobool, label %while.end.loopexit, label %while.body

while.end.loopexit: ; preds = %while.body
br label %while.end

while.end: ; preds = %while.end.loopexit, %entry
%i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
ret i32 %i.0.lcssa
}

; Recognize CTLZ builtin pattern.
; Here it will replace the loop -
; assume builtin is always profitable.
Expand Down Expand Up @@ -157,6 +203,44 @@ while.end: ; preds = %while.cond
ret i32 %i.0
}

; Recognize CTLZ builtin pattern.
; Here it will replace the loop -
; assume builtin is always profitable.
;
; int ctlz_lshr(int n)
; {
; int i = 0;
; while(n >>= 1) {
; i++;
; }
; return i;
; }
;
; ALL: entry
; ALL: %0 = lshr i32 %n, 1
; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
; ALL-NEXT: %2 = sub i32 32, %1
; ALL-NEXT: %3 = add i32 %2, 1
; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
; ALL: ret i32 %i.0.lcssa

; Function Attrs: norecurse nounwind readnone uwtable
define i32 @ctlz_lshr(i32 %n) {
entry:
br label %while.cond

while.cond: ; preds = %while.cond, %entry
%n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
%shr = lshr i32 %n.addr.0, 1
%tobool = icmp eq i32 %shr, 0
%inc = add nsw i32 %i.0, 1
br i1 %tobool, label %while.end, label %while.cond

while.end: ; preds = %while.cond
ret i32 %i.0
}

; Recognize CTLZ builtin pattern.
; Here it will replace the loop -
; assume builtin is always profitable.
Expand Down Expand Up @@ -200,6 +284,45 @@ while.end: ; preds = %while.cond
ret i32 %i.0
}

; Recognize CTLZ builtin pattern.
; Here it will replace the loop -
; assume builtin is always profitable.
;
; int ctlz_add_lshr(int n, int i0)
; {
; int i = i0;
; while(n >>= 1) {
; i++;
; }
; return i;
; }
;
; ALL: entry
; ALL: %0 = lshr i32 %n, 1
; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
; ALL-NEXT: %2 = sub i32 32, %1
; ALL-NEXT: %3 = add i32 %2, 1
; ALL-NEXT: %4 = add i32 %2, %i0
; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
; ALL: ret i32 %i.0.lcssa
;
; Function Attrs: norecurse nounwind readnone uwtable
define i32 @ctlz_add_lshr(i32 %n, i32 %i0) {
entry:
br label %while.cond

while.cond: ; preds = %while.cond, %entry
%n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
%i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
%shr = lshr i32 %n.addr.0, 1
%tobool = icmp eq i32 %shr, 0
%inc = add nsw i32 %i.0, 1
br i1 %tobool, label %while.end, label %while.cond

while.end: ; preds = %while.cond
ret i32 %i.0
}

; Recognize CTLZ builtin pattern.
; Here it will replace the loop -
; assume builtin is always profitable.
Expand Down Expand Up @@ -245,6 +368,45 @@ while.end: ; preds = %while.cond
ret i32 %i.0
}

; Recognize CTLZ builtin pattern.
; Here it will replace the loop -
; assume builtin is always profitable.
;
; int ctlz_sext_lshr(short in)
; {
; int i = 0;
; while(in >>= 1) {
; i++;
; }
; return i;
; }
;
; ALL: entry
; ALL: %0 = lshr i32 %n, 1
; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
; ALL-NEXT: %2 = sub i32 32, %1
; ALL-NEXT: %3 = add i32 %2, 1
; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
; ALL: ret i32 %i.0.lcssa

; Function Attrs: norecurse nounwind readnone uwtable
define i32 @ctlz_sext_lshr(i16 %in) {
entry:
%n = sext i16 %in to i32
br label %while.cond

while.cond: ; preds = %while.cond, %entry
%n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
%shr = lshr i32 %n.addr.0, 1
%tobool = icmp eq i32 %shr, 0
%inc = add nsw i32 %i.0, 1
br i1 %tobool, label %while.end, label %while.cond

while.end: ; preds = %while.cond
ret i32 %i.0
}

; This loop contains a volatile store. If x is initially negative,
; the code will be an infinite loop because the ashr will eventually produce
; all ones and continue doing so. This prevents the loop from terminating. If
Expand Down

0 comments on commit 2835278

Please sign in to comment.