Skip to content

Commit

Permalink
[InstCombine] Shift amount reassociation (PR42391)
Browse files Browse the repository at this point in the history
Summary:
Given pattern:
`(x shiftopcode Q) shiftopcode K`
we should rewrite it as
`x shiftopcode (Q+K)`  iff `(Q+K) u< bitwidth(x)`
This is valid for any shift, but they must be identical.

* https://rise4fun.com/Alive/9E2
* exact on both lshr => exact https://rise4fun.com/Alive/plHk
* exact on both ashr => exact https://rise4fun.com/Alive/QDAA
* nuw on both shl => nuw https://rise4fun.com/Alive/5Uk
* nsw on both shl => nsw https://rise4fun.com/Alive/0plg

Should fix [[ https://bugs.llvm.org/show_bug.cgi?id=42391 | PR42391]].

Reviewers: spatel, nikic, RKSimon

Reviewed By: nikic

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63812

llvm-svn: 364712
  • Loading branch information
LebedevRI committed Jun 29, 2019
1 parent fe107fc commit e3a94ba
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 48 deletions.
48 changes: 48 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,50 @@ using namespace PatternMatch;

#define DEBUG_TYPE "instcombine"

// Given pattern:
// (x shiftopcode Q) shiftopcode K
// we should rewrite it as
// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x)
// This is valid for any shift, but they must be identical.
static Instruction *
reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
const SimplifyQuery &SQ) {
// Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1
Value *X, *ShAmt1, *Sh1Value, *ShAmt0;
if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)),
m_Value(Sh1Value)),
m_Value(ShAmt0))))
return nullptr;
auto *Sh1 = cast<BinaryOperator>(Sh1Value);

// The shift opcodes must be identical.
Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode();
if (ShiftOpcode != Sh1->getOpcode())
return nullptr;
// Can we fold (ShAmt0+ShAmt1) ?
Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1,
SQ.getWithInstruction(Sh0));
if (!NewShAmt)
return nullptr; // Did not simplify.
// Is the new shift amount smaller than the bit width?
// FIXME: could also rely on ConstantRange.
unsigned BitWidth = X->getType()->getScalarSizeInBits();
if (!match(NewShAmt, m_SpecificInt_ULT(APInt(BitWidth, BitWidth))))
return nullptr;
// All good, we can do this fold.
BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
// If both of the original shifts had the same flag set, preserve the flag.
if (ShiftOpcode == Instruction::BinaryOps::Shl) {
NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
Sh1->hasNoUnsignedWrap());
NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
Sh1->hasNoSignedWrap());
} else {
NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
}
return NewShift;
}

Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
assert(Op0->getType() == Op1->getType());
Expand All @@ -38,6 +82,10 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;

if (Instruction *NewShift =
reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))
return NewShift;

// (C1 shift (A add C2)) -> (C1 shift C2) shift A)
// iff A and C2 are both positive.
Value *A;
Expand Down
60 changes: 12 additions & 48 deletions llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@

define i32 @t0(i32 %x, i32 %y) {
; CHECK-LABEL: @t0(
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2
; CHECK-NEXT: [[T3:%.*]] = lshr exact i32 [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = lshr i32 [[X:%.*]], 30
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = sub i32 32, %y
Expand All @@ -27,10 +24,7 @@ define i32 @t0(i32 %x, i32 %y) {

define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @t1_vec_splat(
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> <i32 32, i32 32>, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[Y]], <i32 -2, i32 -2>
; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 30, i32 30>
; CHECK-NEXT: ret <2 x i32> [[T3]]
;
%t0 = sub <2 x i32> <i32 32, i32 32>, %y
Expand All @@ -42,10 +36,7 @@ define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) {

define <2 x i32> @t2_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @t2_vec_nonsplat(
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> <i32 32, i32 30>, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[Y]], <i32 -2, i32 0>
; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 30, i32 30>
; CHECK-NEXT: ret <2 x i32> [[T3]]
;
%t0 = sub <2 x i32> <i32 32, i32 30>, %y
Expand All @@ -59,10 +50,7 @@ define <2 x i32> @t2_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {

define <3 x i32> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i32> %y) {
; CHECK-LABEL: @t3_vec_nonsplat_undef0(
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i32> <i32 32, i32 undef, i32 32>, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[Y]], <i32 -2, i32 -2, i32 -2>
; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30>
; CHECK-NEXT: ret <3 x i32> [[T3]]
;
%t0 = sub <3 x i32> <i32 32, i32 undef, i32 32>, %y
Expand All @@ -74,10 +62,7 @@ define <3 x i32> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i32> %y) {

define <3 x i32> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) {
; CHECK-LABEL: @t4_vec_nonsplat_undef1(
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i32> <i32 32, i32 32, i32 32>, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[Y]], <i32 -2, i32 undef, i32 -2>
; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30>
; CHECK-NEXT: ret <3 x i32> [[T3]]
;
%t0 = sub <3 x i32> <i32 32, i32 32, i32 32>, %y
Expand All @@ -89,10 +74,7 @@ define <3 x i32> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) {

define <3 x i32> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) {
; CHECK-LABEL: @t5_vec_nonsplat_undef1(
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i32> <i32 32, i32 undef, i32 32>, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[Y]], <i32 -2, i32 undef, i32 -2>
; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30>
; CHECK-NEXT: ret <3 x i32> [[T3]]
;
%t0 = sub <3 x i32> <i32 32, i32 undef, i32 32>, %y
Expand All @@ -105,10 +87,7 @@ define <3 x i32> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) {
; Some other shift opcodes
define i32 @t6_shl(i32 %x, i32 %y) {
; CHECK-LABEL: @t6_shl(
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = shl nuw i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2
; CHECK-NEXT: [[T3:%.*]] = shl nsw i32 [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X:%.*]], 30
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = sub i32 32, %y
Expand All @@ -119,10 +98,7 @@ define i32 @t6_shl(i32 %x, i32 %y) {
}
define i32 @t7_ashr(i32 %x, i32 %y) {
; CHECK-LABEL: @t7_ashr(
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = ashr exact i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2
; CHECK-NEXT: [[T3:%.*]] = ashr i32 [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = ashr i32 [[X:%.*]], 30
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = sub i32 32, %y
Expand All @@ -135,10 +111,7 @@ define i32 @t7_ashr(i32 %x, i32 %y) {
; If the same flag is present on both shifts, it can be kept.
define i32 @t8_lshr_exact_flag_preservation(i32 %x, i32 %y) {
; CHECK-LABEL: @t8_lshr_exact_flag_preservation(
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2
; CHECK-NEXT: [[T3:%.*]] = lshr exact i32 [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = lshr exact i32 [[X:%.*]], 30
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = sub i32 32, %y
Expand All @@ -149,10 +122,7 @@ define i32 @t8_lshr_exact_flag_preservation(i32 %x, i32 %y) {
}
define i32 @t9_ashr_exact_flag_preservation(i32 %x, i32 %y) {
; CHECK-LABEL: @t9_ashr_exact_flag_preservation(
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = ashr exact i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2
; CHECK-NEXT: [[T3:%.*]] = ashr exact i32 [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = ashr exact i32 [[X:%.*]], 30
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = sub i32 32, %y
Expand All @@ -163,10 +133,7 @@ define i32 @t9_ashr_exact_flag_preservation(i32 %x, i32 %y) {
}
define i32 @t10_shl_nuw_flag_preservation(i32 %x, i32 %y) {
; CHECK-LABEL: @t10_shl_nuw_flag_preservation(
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = shl nuw i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2
; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw i32 [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[X:%.*]], 30
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = sub i32 32, %y
Expand All @@ -177,10 +144,7 @@ define i32 @t10_shl_nuw_flag_preservation(i32 %x, i32 %y) {
}
define i32 @t11_shl_nsw_flag_preservation(i32 %x, i32 %y) {
; CHECK-LABEL: @t11_shl_nsw_flag_preservation(
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = shl nsw i32 [[X:%.*]], [[T0]]
; CHECK-NEXT: [[T2:%.*]] = add i32 [[Y]], -2
; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw i32 [[T1]], [[T2]]
; CHECK-NEXT: [[T3:%.*]] = shl nsw i32 [[X:%.*]], 30
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = sub i32 32, %y
Expand Down

0 comments on commit e3a94ba

Please sign in to comment.