Skip to content

Commit

Permalink
[InstCombine] Optimize shl+lshr+and conversion pattern
Browse files Browse the repository at this point in the history
if `C1` and `C3` are pow2 and `Log2(C3)+C2 < BitWidth`:
    ((C1 << X) >> C2) & C3 -> X == (Log2(C3)+C2-Log2(C1)) ? C3 : 0;

https://alive2.llvm.org/ce/z/Pus5bd

Fix issue #55739

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D126617
  • Loading branch information
bcl5980 committed Jun 10, 2022
1 parent 51a41f2 commit de7a6ae
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 31 deletions.
24 changes: 24 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1904,6 +1904,30 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return new ZExtInst(NewBO, Ty);
}
}

Constant *C1, *C2;
const APInt *C3 = C;
Value *X;
if (C3->isPowerOf2() &&
match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)),
m_ImmConstant(C2)))) &&
match(C1, m_Power2())) {
Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1);
Constant *Log2C3 = ConstantInt::get(Ty, C3->countTrailingZeros());
Constant *LshrC = ConstantExpr::getAdd(C2, Log2C3);
KnownBits KnownLShrc = computeKnownBits(LshrC, 0, nullptr);
if (KnownLShrc.getMaxValue().ult(Width)) {
// iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth:
// ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0
Constant *CmpC = ConstantExpr::getSub(LshrC, Log2C1);
Value *Cmp = Builder.CreateICmpEQ(X, CmpC);
return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3),
ConstantInt::getNullValue(Ty));
}
// TODO: Symmetrical case
// iff C1,C3 is pow2 and Log2(C3) >= C2:
// ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0
}
}

if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),
Expand Down
28 changes: 13 additions & 15 deletions llvm/test/Transforms/InstCombine/and.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1778,9 +1778,8 @@ define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) {

define i16 @shl_lshr_pow2_const_case1(i16 %x) {
; CHECK-LABEL: @shl_lshr_pow2_const_case1(
; CHECK-NEXT: [[SHL:%.*]] = shl i16 4, [[X:%.*]]
; CHECK-NEXT: [[LSHR:%.*]] = lshr i16 [[SHL]], 6
; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 8
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0
; CHECK-NEXT: ret i16 [[R]]
;
%shl = shl i16 4, %x
Expand All @@ -1791,9 +1790,8 @@ define i16 @shl_lshr_pow2_const_case1(i16 %x) {

define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) {
; CHECK-LABEL: @shl_lshr_pow2_const_case1_uniform_vec(
; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> <i16 4, i16 4, i16 4>, [[X:%.*]]
; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 6, i16 6, i16 6>
; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 7, i16 7, i16 7>
; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
; CHECK-NEXT: ret <3 x i16> [[R]]
;
%shl = shl <3 x i16> <i16 4, i16 4, i16 4>, %x
Expand All @@ -1804,22 +1802,20 @@ define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) {

define <3 x i16> @shl_lshr_pow2_const_case1_non_uniform_vec(<3 x i16> %x) {
; CHECK-LABEL: @shl_lshr_pow2_const_case1_non_uniform_vec(
; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> <i16 16, i16 8, i16 4>, [[X:%.*]]
; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 5, i16 4, i16 3>
; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 16, i16 4>
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 7, i16 6, i16 1>
; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
; CHECK-NEXT: ret <3 x i16> [[R]]
;
%shl = shl <3 x i16> <i16 16, i16 8, i16 4>, %x
%lshr = lshr <3 x i16> %shl, <i16 5, i16 4, i16 3>
%r = and <3 x i16> %lshr, <i16 8, i16 16, i16 4>
%shl = shl <3 x i16> <i16 2, i16 8, i16 32>, %x
%lshr = lshr <3 x i16> %shl, <i16 5, i16 6, i16 3>
%r = and <3 x i16> %lshr, <i16 8, i16 8, i16 8>
ret <3 x i16> %r
}

define <3 x i16> @shl_lshr_pow2_const_case1_undef1_vec(<3 x i16> %x) {
; CHECK-LABEL: @shl_lshr_pow2_const_case1_undef1_vec(
; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> <i16 undef, i16 16, i16 16>, [[X:%.*]]
; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 5, i16 5, i16 5>
; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 8, i16 4, i16 4>
; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
; CHECK-NEXT: ret <3 x i16> [[R]]
;
%shl = shl <3 x i16> <i16 undef, i16 16, i16 16>, %x
Expand Down Expand Up @@ -1868,6 +1864,8 @@ define i16 @shl_lshr_pow2_const_case2(i16 %x) {
ret i16 %r
}

; TODO: this pattern can be transform to icmp+select

define i16 @shl_lshr_pow2_not_const_case2(i16 %x) {
; CHECK-LABEL: @shl_lshr_pow2_not_const_case2(
; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2, [[X:%.*]]
Expand Down
26 changes: 10 additions & 16 deletions llvm/test/Transforms/InstCombine/icmp-and-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,9 @@ define <2 x i32> @icmp_ne_and_pow2_shl1_vec(<2 x i32> %0) {

define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) {
; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2(
; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]]
; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4
; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 3
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP3]]
;
%shl = shl i32 2, %0
%and = and i32 %shl, 16
Expand All @@ -72,11 +70,9 @@ define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) {

define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) {
; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_vec(
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 4, i32 4>, [[TMP0:%.*]]
; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[SHL]], <i32 4, i32 4>
; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%shl = shl <2 x i32> <i32 4, i32 4>, %0
%and = and <2 x i32> %shl, <i32 16, i32 16>
Expand All @@ -87,9 +83,8 @@ define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) {

define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) {
; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2(
; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]]
; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4
; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0:%.*]], 3
; CHECK-NEXT: [[AND_LOBIT:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[AND_LOBIT]]
;
%shl = shl i32 2, %0
Expand All @@ -101,9 +96,8 @@ define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) {

define <2 x i32> @icmp_ne_and_pow2_shl_pow2_vec(<2 x i32> %0) {
; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2_vec(
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 4, i32 4>, [[TMP0:%.*]]
; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[SHL]], <i32 4, i32 4>
; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
; CHECK-NEXT: [[AND_LOBIT:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]]
;
%shl = shl <2 x i32> <i32 4, i32 4>, %0
Expand Down

0 comments on commit de7a6ae

Please sign in to comment.