Skip to content

Commit

Permalink
[InstCombine] Prefer -(x & 1) as the low bit splatting pattern (PR5…
Browse files Browse the repository at this point in the history
…1305)

Both patterns are equivalent (https://alive2.llvm.org/ce/z/jfCViF),
so we should have a preference. It seems like mask+negation is better
than two shifts.
  • Loading branch information
LebedevRI committed Aug 7, 2021
1 parent d88d279 commit e718705
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 21 deletions.
17 changes: 16 additions & 1 deletion llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
Expand Up @@ -1346,6 +1346,22 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
}
}

// Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)`
// as the pattern to splat the lowest bit.
// FIXME: iff X is already masked, we don't need the one-use check.
Value *X;
if (match(Op1, m_SpecificIntAllowUndef(BitWidth - 1)) &&
match(Op0, m_OneUse(m_Shl(m_Value(X),
m_SpecificIntAllowUndef(BitWidth - 1))))) {
Constant *Mask = ConstantInt::get(Ty, 1);
// Retain the knowledge about the ignored lanes.
Mask = Constant::mergeUndefsWith(
Constant::mergeUndefsWith(Mask, cast<Constant>(Op1)),
cast<Constant>(cast<Instruction>(Op0)->getOperand(1)));
X = Builder.CreateAnd(X, Mask);
return BinaryOperator::CreateNeg(X);
}

if (Instruction *R = foldVariableSignZeroExtensionOfVariableHighBitExtract(I))
return R;

Expand All @@ -1354,7 +1370,6 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
return BinaryOperator::CreateLShr(Op0, Op1);

// ashr (xor %x, -1), %y --> xor (ashr %x, %y), -1
Value *X;
if (match(Op0, m_OneUse(m_Not(m_Value(X))))) {
// Note that we must drop 'exact'-ness of the shift!
// Note that we can't keep undef's in -1 vector constant!
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/InstCombine/exact.ll
Expand Up @@ -147,8 +147,8 @@ define <2 x i1> @ashr_icmp2_vec(<2 x i64> %X) {
; Make sure we don't transform the ashr here into an sdiv
define i1 @pr9998(i32 %V) {
; CHECK-LABEL: @pr9998(
; CHECK-NEXT: [[W_MASK:%.*]] = and i32 [[V:%.*]], 1
; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[W_MASK]], 0
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[V:%.*]], 1
; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[Z]]
;
%W = shl i32 %V, 31
Expand All @@ -161,8 +161,8 @@ define i1 @pr9998(i32 %V) {
; FIXME: Vectors should fold the same way.
define <2 x i1> @pr9998vec(<2 x i32> %V) {
; CHECK-LABEL: @pr9998vec(
; CHECK-NEXT: [[W:%.*]] = shl <2 x i32> [[V:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[X:%.*]] = ashr exact <2 x i32> [[W]], <i32 31, i32 31>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[V:%.*]], <i32 1, i32 1>
; CHECK-NEXT: [[X:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP1]]
; CHECK-NEXT: [[Y:%.*]] = sext <2 x i32> [[X]] to <2 x i64>
; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], <i64 7297771788697658747, i64 7297771788697658747>
; CHECK-NEXT: ret <2 x i1> [[Z]]
Expand Down
29 changes: 15 additions & 14 deletions llvm/test/Transforms/InstCombine/low-bit-splat.ll
Expand Up @@ -9,8 +9,8 @@ declare void @use8(i8)
; Basic positive scalar tests
define i8 @t0(i8 %x) {
; CHECK-LABEL: @t0(
; CHECK-NEXT: [[I0:%.*]] = shl i8 [[X:%.*]], 7
; CHECK-NEXT: [[R:%.*]] = ashr exact i8 [[I0]], 7
; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = sub nsw i8 0, [[TMP1]]
; CHECK-NEXT: ret i8 [[R]]
;
%i0 = shl i8 %x, 7
Expand All @@ -19,8 +19,8 @@ define i8 @t0(i8 %x) {
}
define i16 @t1_otherbitwidth(i16 %x) {
; CHECK-LABEL: @t1_otherbitwidth(
; CHECK-NEXT: [[I0:%.*]] = shl i16 [[X:%.*]], 15
; CHECK-NEXT: [[R:%.*]] = ashr exact i16 [[I0]], 15
; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = sub nsw i16 0, [[TMP1]]
; CHECK-NEXT: ret i16 [[R]]
;
%i0 = shl i16 %x, 15
Expand All @@ -31,8 +31,8 @@ define i16 @t1_otherbitwidth(i16 %x) {
; Basic positive vector tests
define <2 x i8> @t2_vec(<2 x i8> %x) {
; CHECK-LABEL: @t2_vec(
; CHECK-NEXT: [[I0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 7, i8 7>
; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i8> [[I0]], <i8 7, i8 7>
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 1, i8 1>
; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> zeroinitializer, [[TMP1]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%i0 = shl <2 x i8> %x, <i8 7, i8 7>
Expand All @@ -41,8 +41,8 @@ define <2 x i8> @t2_vec(<2 x i8> %x) {
}
define <3 x i8> @t3_vec_undef0(<3 x i8> %x) {
; CHECK-LABEL: @t3_vec_undef0(
; CHECK-NEXT: [[I0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 7, i8 undef, i8 7>
; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> [[I0]], <i8 7, i8 7, i8 7>
; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i8> [[X:%.*]], <i8 1, i8 undef, i8 1>
; CHECK-NEXT: [[R:%.*]] = sub <3 x i8> zeroinitializer, [[TMP1]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%i0 = shl <3 x i8> %x, <i8 7, i8 undef, i8 7>
Expand All @@ -51,8 +51,8 @@ define <3 x i8> @t3_vec_undef0(<3 x i8> %x) {
}
define <3 x i8> @t4_vec_undef1(<3 x i8> %x) {
; CHECK-LABEL: @t4_vec_undef1(
; CHECK-NEXT: [[I0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 7, i8 7, i8 7>
; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> [[I0]], <i8 7, i8 undef, i8 7>
; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i8> [[X:%.*]], <i8 1, i8 undef, i8 1>
; CHECK-NEXT: [[R:%.*]] = sub <3 x i8> zeroinitializer, [[TMP1]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%i0 = shl <3 x i8> %x, <i8 7, i8 7, i8 7>
Expand All @@ -61,8 +61,8 @@ define <3 x i8> @t4_vec_undef1(<3 x i8> %x) {
}
define <3 x i8> @t5_vec_undef2(<3 x i8> %x) {
; CHECK-LABEL: @t5_vec_undef2(
; CHECK-NEXT: [[I0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 7, i8 undef, i8 7>
; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> [[I0]], <i8 7, i8 undef, i8 7>
; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i8> [[X:%.*]], <i8 1, i8 undef, i8 1>
; CHECK-NEXT: [[R:%.*]] = sub <3 x i8> zeroinitializer, [[TMP1]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%i0 = shl <3 x i8> %x, <i8 7, i8 undef, i8 7>
Expand All @@ -89,8 +89,8 @@ define i8 @t7_already_masked(i8 %x) {
; CHECK-LABEL: @t7_already_masked(
; CHECK-NEXT: [[I0:%.*]] = and i8 [[X:%.*]], 1
; CHECK-NEXT: call void @use8(i8 [[I0]])
; CHECK-NEXT: [[I1:%.*]] = shl i8 [[X]], 7
; CHECK-NEXT: [[R:%.*]] = ashr exact i8 [[I1]], 7
; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], 1
; CHECK-NEXT: [[R:%.*]] = sub nsw i8 0, [[TMP1]]
; CHECK-NEXT: ret i8 [[R]]
;
%i0 = and i8 %x, 1
Expand All @@ -99,6 +99,7 @@ define i8 @t7_already_masked(i8 %x) {
%r = ashr i8 %i1, 7
ret i8 %r
}
; FIXME: we should fold this
define i8 @t8_already_masked_extrause(i8 %x) {
; CHECK-LABEL: @t8_already_masked_extrause(
; CHECK-NEXT: [[I0:%.*]] = and i8 [[X:%.*]], 1
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Transforms/InstCombine/sext.ll
Expand Up @@ -306,8 +306,10 @@ define i32 @test18(i16 %x) {

define i10 @test19(i10 %i) {
; CHECK-LABEL: @test19(
; CHECK-NEXT: [[D1:%.*]] = shl i10 [[I:%.*]], 9
; CHECK-NEXT: [[D:%.*]] = ashr exact i10 [[D1]], 9
; CHECK-NEXT: [[A:%.*]] = trunc i10 [[I:%.*]] to i3
; CHECK-NEXT: [[TMP1:%.*]] = and i3 [[A]], 1
; CHECK-NEXT: [[C:%.*]] = sub nsw i3 0, [[TMP1]]
; CHECK-NEXT: [[D:%.*]] = sext i3 [[C]] to i10
; CHECK-NEXT: ret i10 [[D]]
;
%a = trunc i10 %i to i3
Expand Down

0 comments on commit e718705

Please sign in to comment.