Skip to content

Commit

Permalink
[InstCombine] allow peeking through zext of shift amount to match rot…
Browse files Browse the repository at this point in the history
…ate idioms (PR45701)

We might want to also allow trunc of the shift amount, but that seems less likely?

  define i32 @src(i32 %x, i1 %y) {
  %0:
    %rem = and i1 %y, 1
    %cmp = icmp eq i1 %rem, 0
    %sh_prom = zext i1 %rem to i32
    %sub = sub nsw nuw i1 0, %rem
    %sh_prom1 = zext i1 %sub to i32
    %shr = lshr i32 %x, %sh_prom1
    %shl = shl i32 %x, %sh_prom
    %or = or i32 %shl, %shr
    %r = select i1 %cmp, i32 %x, i32 %or
    ret i32 %r
  }
  =>
  define i32 @tgt(i32 %x, i1 %y) {
  %0:
    %t = zext i1 %y to i32
    %r = fshl i32 %x, i32 %x, i32 %t
    ret i32 %r
  }

  Transformation seems to be correct!

https://alive2.llvm.org/ce/z/xgMvE3

http://bugs.llvm.org/PR45701
  • Loading branch information
rotateright committed Jul 20, 2020
1 parent 92ec0c5 commit 750f4c5
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 22 deletions.
12 changes: 8 additions & 4 deletions llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Expand Up @@ -2297,16 +2297,19 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
/// funnel shift intrinsic. Example:
/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
/// --> call llvm.fshl.i32(a, a, b)
static Instruction *foldSelectRotate(SelectInst &Sel) {
static Instruction *foldSelectRotate(SelectInst &Sel,
InstCombiner::BuilderTy &Builder) {
// The false value of the select must be a rotate of the true value.
Value *Or0, *Or1;
if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1)))))
return nullptr;

Value *TVal = Sel.getTrueValue();
Value *SA0, *SA1;
if (!match(Or0, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA0)))) ||
!match(Or1, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA1)))))
if (!match(Or0, m_OneUse(m_LogicalShift(m_Specific(TVal),
m_ZExtOrSelf(m_Value(SA0))))) ||
!match(Or1, m_OneUse(m_LogicalShift(m_Specific(TVal),
m_ZExtOrSelf(m_Value(SA1))))))
return nullptr;

auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
Expand Down Expand Up @@ -2344,6 +2347,7 @@ static Instruction *foldSelectRotate(SelectInst &Sel) {
(ShAmt == SA1 && ShiftOpcode1 == BinaryOperator::Shl);
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType());
ShAmt = Builder.CreateZExt(ShAmt, Sel.getType());
return IntrinsicInst::Create(F, { TVal, TVal, ShAmt });
}

Expand Down Expand Up @@ -2960,7 +2964,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI, *this))
return Select;

if (Instruction *Rot = foldSelectRotate(SI))
if (Instruction *Rot = foldSelectRotate(SI, Builder))
return Rot;

if (Instruction *Copysign = foldSelectToCopysign(SI, Builder))
Expand Down
22 changes: 4 additions & 18 deletions llvm/test/Transforms/InstCombine/rotate.ll
Expand Up @@ -691,15 +691,8 @@ define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {

define i32 @rotl_select_zext_shamt(i32 %x, i8 %y) {
; CHECK-LABEL: @rotl_select_zext_shamt(
; CHECK-NEXT: [[REM:%.*]] = and i8 [[Y:%.*]], 31
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[REM]], 0
; CHECK-NEXT: [[SH_PROM:%.*]] = zext i8 [[REM]] to i32
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i8 32, [[REM]]
; CHECK-NEXT: [[SH_PROM1:%.*]] = zext i8 [[SUB]] to i32
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], [[SH_PROM1]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X]], [[SH_PROM]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[OR]]
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[Y:%.*]] to i32
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[TMP1]])
; CHECK-NEXT: ret i32 [[R]]
;
%rem = and i8 %y, 31
Expand All @@ -716,15 +709,8 @@ define i32 @rotl_select_zext_shamt(i32 %x, i8 %y) {

define i64 @rotr_select_zext_shamt(i64 %x, i32 %y) {
; CHECK-LABEL: @rotr_select_zext_shamt(
; CHECK-NEXT: [[REM:%.*]] = and i32 [[Y:%.*]], 63
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 0
; CHECK-NEXT: [[SH_PROM:%.*]] = zext i32 [[REM]] to i64
; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], [[SH_PROM]]
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 64, [[REM]]
; CHECK-NEXT: [[SH_PROM1:%.*]] = zext i32 [[SUB]] to i64
; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[X]], [[SH_PROM1]]
; CHECK-NEXT: [[OR:%.*]] = or i64 [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i64 [[X]], i64 [[OR]]
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[Y:%.*]] to i64
; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[TMP1]])
; CHECK-NEXT: ret i64 [[R]]
;
%rem = and i32 %y, 63
Expand Down

0 comments on commit 750f4c5

Please sign in to comment.