From e71870512fd896bf6cf34e8ae650f4cf20923258 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 7 Aug 2021 15:20:35 +0300 Subject: [PATCH] [InstCombine] Prefer `-(x & 1)` as the low bit splatting pattern (PR51305) Both patterns are equivalent (https://alive2.llvm.org/ce/z/jfCViF), so we should have a preference. It seems like mask+negation is better than two shifts. --- .../InstCombine/InstCombineShifts.cpp | 17 ++++++++++- llvm/test/Transforms/InstCombine/exact.ll | 8 ++--- .../Transforms/InstCombine/low-bit-splat.ll | 29 ++++++++++--------- llvm/test/Transforms/InstCombine/sext.ll | 6 ++-- 4 files changed, 39 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index ca5e473fdecba..01f8e60db7e78 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1346,6 +1346,22 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { } } + // Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)` + // as the pattern to splat the lowest bit. + // FIXME: iff X is already masked, we don't need the one-use check. + Value *X; + if (match(Op1, m_SpecificIntAllowUndef(BitWidth - 1)) && + match(Op0, m_OneUse(m_Shl(m_Value(X), + m_SpecificIntAllowUndef(BitWidth - 1))))) { + Constant *Mask = ConstantInt::get(Ty, 1); + // Retain the knowledge about the ignored lanes. + Mask = Constant::mergeUndefsWith( + Constant::mergeUndefsWith(Mask, cast(Op1)), + cast(cast(Op0)->getOperand(1))); + X = Builder.CreateAnd(X, Mask); + return BinaryOperator::CreateNeg(X); + } + if (Instruction *R = foldVariableSignZeroExtensionOfVariableHighBitExtract(I)) return R; @@ -1354,7 +1370,6 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { return BinaryOperator::CreateLShr(Op0, Op1); // ashr (xor %x, -1), %y --> xor (ashr %x, %y), -1 - Value *X; if (match(Op0, m_OneUse(m_Not(m_Value(X))))) { // Note that we must drop 'exact'-ness of the shift! // Note that we can't keep undef's in -1 vector constant! diff --git a/llvm/test/Transforms/InstCombine/exact.ll b/llvm/test/Transforms/InstCombine/exact.ll index 6b52dfb0380ea..e87e12cc1bea1 100644 --- a/llvm/test/Transforms/InstCombine/exact.ll +++ b/llvm/test/Transforms/InstCombine/exact.ll @@ -147,8 +147,8 @@ define <2 x i1> @ashr_icmp2_vec(<2 x i64> %X) { ; Make sure we don't transform the ashr here into an sdiv define i1 @pr9998(i32 %V) { ; CHECK-LABEL: @pr9998( -; CHECK-NEXT: [[W_MASK:%.*]] = and i32 [[V:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[W_MASK]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[V:%.*]], 1 +; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[Z]] ; %W = shl i32 %V, 31 @@ -161,8 +161,8 @@ define i1 @pr9998(i32 %V) { ; FIXME: Vectors should fold the same way. define <2 x i1> @pr9998vec(<2 x i32> %V) { ; CHECK-LABEL: @pr9998vec( -; CHECK-NEXT: [[W:%.*]] = shl <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[X:%.*]] = ashr exact <2 x i32> [[W]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[X:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[Y:%.*]] = sext <2 x i32> [[X]] to <2 x i64> ; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], ; CHECK-NEXT: ret <2 x i1> [[Z]] diff --git a/llvm/test/Transforms/InstCombine/low-bit-splat.ll b/llvm/test/Transforms/InstCombine/low-bit-splat.ll index ce891f7d03f1e..332cb32ad1de6 100644 --- a/llvm/test/Transforms/InstCombine/low-bit-splat.ll +++ b/llvm/test/Transforms/InstCombine/low-bit-splat.ll @@ -9,8 +9,8 @@ declare void @use8(i8) ; Basic positive scalar tests define i8 @t0(i8 %x) { ; CHECK-LABEL: @t0( -; CHECK-NEXT: [[I0:%.*]] = shl i8 [[X:%.*]], 7 -; CHECK-NEXT: [[R:%.*]] = ashr exact i8 [[I0]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = sub nsw i8 0, [[TMP1]] ; CHECK-NEXT: ret i8 [[R]] ; %i0 = shl i8 %x, 7 @@ -19,8 +19,8 @@ define i8 @t0(i8 %x) { } define i16 @t1_otherbitwidth(i16 %x) { ; CHECK-LABEL: @t1_otherbitwidth( -; CHECK-NEXT: [[I0:%.*]] = shl i16 [[X:%.*]], 15 -; CHECK-NEXT: [[R:%.*]] = ashr exact i16 [[I0]], 15 +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = sub nsw i16 0, [[TMP1]] ; CHECK-NEXT: ret i16 [[R]] ; %i0 = shl i16 %x, 15 @@ -31,8 +31,8 @@ define i16 @t1_otherbitwidth(i16 %x) { ; Basic positive vector tests define <2 x i8> @t2_vec(<2 x i8> %x) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[I0:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i8> [[I0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %i0 = shl <2 x i8> %x, @@ -41,8 +41,8 @@ define <2 x i8> @t2_vec(<2 x i8> %x) { } define <3 x i8> @t3_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @t3_vec_undef0( -; CHECK-NEXT: [[I0:%.*]] = shl <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> [[I0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = sub <3 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; %i0 = shl <3 x i8> %x, @@ -51,8 +51,8 @@ define <3 x i8> @t3_vec_undef0(<3 x i8> %x) { } define <3 x i8> @t4_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @t4_vec_undef1( -; CHECK-NEXT: [[I0:%.*]] = shl <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> [[I0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = sub <3 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; %i0 = shl <3 x i8> %x, @@ -61,8 +61,8 @@ define <3 x i8> @t4_vec_undef1(<3 x i8> %x) { } define <3 x i8> @t5_vec_undef2(<3 x i8> %x) { ; CHECK-LABEL: @t5_vec_undef2( -; CHECK-NEXT: [[I0:%.*]] = shl <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> [[I0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = sub <3 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[R]] ; %i0 = shl <3 x i8> %x, @@ -89,8 +89,8 @@ define i8 @t7_already_masked(i8 %x) { ; CHECK-LABEL: @t7_already_masked( ; CHECK-NEXT: [[I0:%.*]] = and i8 [[X:%.*]], 1 ; CHECK-NEXT: call void @use8(i8 [[I0]]) -; CHECK-NEXT: [[I1:%.*]] = shl i8 [[X]], 7 -; CHECK-NEXT: [[R:%.*]] = ashr exact i8 [[I1]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], 1 +; CHECK-NEXT: [[R:%.*]] = sub nsw i8 0, [[TMP1]] ; CHECK-NEXT: ret i8 [[R]] ; %i0 = and i8 %x, 1 @@ -99,6 +99,7 @@ define i8 @t7_already_masked(i8 %x) { %r = ashr i8 %i1, 7 ret i8 %r } +; FIXME: we should fold this define i8 @t8_already_masked_extrause(i8 %x) { ; CHECK-LABEL: @t8_already_masked_extrause( ; CHECK-NEXT: [[I0:%.*]] = and i8 [[X:%.*]], 1 diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll index 04573e2e8ddca..1fc645cc9289a 100644 --- a/llvm/test/Transforms/InstCombine/sext.ll +++ b/llvm/test/Transforms/InstCombine/sext.ll @@ -306,8 +306,10 @@ define i32 @test18(i16 %x) { define i10 @test19(i10 %i) { ; CHECK-LABEL: @test19( -; CHECK-NEXT: [[D1:%.*]] = shl i10 [[I:%.*]], 9 -; CHECK-NEXT: [[D:%.*]] = ashr exact i10 [[D1]], 9 +; CHECK-NEXT: [[A:%.*]] = trunc i10 [[I:%.*]] to i3 +; CHECK-NEXT: [[TMP1:%.*]] = and i3 [[A]], 1 +; CHECK-NEXT: [[C:%.*]] = sub nsw i3 0, [[TMP1]] +; CHECK-NEXT: [[D:%.*]] = sext i3 [[C]] to i10 ; CHECK-NEXT: ret i10 [[D]] ; %a = trunc i10 %i to i3