diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 9c6624f86f135..c77b4955dfd2e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1129,6 +1129,35 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { return nullptr; } +/// Try to reduce signed division by power-of-2 to an arithmetic shift right. +static Instruction *foldAddToAshr(BinaryOperator &Add) { + // Division must be by power-of-2, but not the minimum signed value. + Value *X; + const APInt *DivC; + if (!match(Add.getOperand(0), m_SDiv(m_Value(X), m_Power2(DivC))) || + DivC->isNegative()) + return nullptr; + + // Rounding is done by adding -1 if the dividend (X) is negative and has any + // low bits set. The canonical pattern for that is an "ugt" compare with SMIN: + // sext (icmp ugt (X & (DivC - 1)), SMIN) + const APInt *MaskC; + ICmpInst::Predicate Pred; + if (!match(Add.getOperand(1), + m_SExt(m_ICmp(Pred, m_And(m_Specific(X), m_APInt(MaskC)), + m_SignMask()))) || + Pred != ICmpInst::ICMP_UGT) + return nullptr; + + APInt SMin = APInt::getSignedMinValue(Add.getType()->getScalarSizeInBits()); + if (*MaskC != (SMin | (*DivC - 1))) + return nullptr; + + // (X / DivC) + sext ((X & (SMin | (DivC - 1)) >u SMin) --> X >>s log2(DivC) + return BinaryOperator::CreateAShr( + X, ConstantInt::get(Add.getType(), DivC->exactLogBase2())); +} + Instruction *InstCombinerImpl:: canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( BinaryOperator &I) { @@ -1484,6 +1513,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateSub(B, Shl); } + if (Instruction *Ashr = foldAddToAshr(I)) + return Ashr; + // TODO(jingyue): Consider willNotOverflowSignedAdd and // willNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 7002009540eab..5bc34deb76061 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -2462,11 +2462,7 @@ define i9 @sext_zext_not_commute(i4 %x) { define i32 @floor_sdiv(i32 %x) { ; CHECK-LABEL: @floor_sdiv( -; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4 -; CHECK-NEXT: [[A:%.*]] = and i32 [[X]], -2147483645 -; CHECK-NEXT: [[I:%.*]] = icmp ugt i32 [[A]], -2147483648 -; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i32 -; CHECK-NEXT: [[R:%.*]] = add nsw i32 [[D]], [[S]] +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[R]] ; %d = sdiv i32 %x, 4 @@ -2477,13 +2473,11 @@ define i32 @floor_sdiv(i32 %x) { ret i32 %r } +; vectors work too and commute is handled by complexity-based canonicalization + define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) { ; CHECK-LABEL: @floor_sdiv_vec_commute( -; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[X]], -; CHECK-NEXT: [[I:%.*]] = icmp ugt <2 x i32> [[A]], -; CHECK-NEXT: [[S:%.*]] = sext <2 x i1> [[I]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[D]], [[S]] +; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[R]] ; %d = sdiv <2 x i32> %x, @@ -2494,6 +2488,8 @@ define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) { ret <2 x i32> %r } +; extra uses are ok + define i8 @floor_sdiv_uses(i8 %x) { ; CHECK-LABEL: @floor_sdiv_uses( ; CHECK-NEXT: [[D:%.*]] = sdiv i8 [[X:%.*]], 16 @@ -2503,7 +2499,7 @@ define i8 @floor_sdiv_uses(i8 %x) { ; CHECK-NEXT: [[I:%.*]] = icmp ugt i8 [[A]], -128 ; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i8 ; CHECK-NEXT: call void @use(i8 [[S]]) -; CHECK-NEXT: [[R:%.*]] = add nsw i8 [[D]], [[S]] +; CHECK-NEXT: [[R:%.*]] = ashr i8 [[X]], 4 ; CHECK-NEXT: ret i8 [[R]] ; %d = sdiv i8 %x, 16 @@ -2517,6 +2513,8 @@ define i8 @floor_sdiv_uses(i8 %x) { ret i8 %r } +; negative test + define i32 @floor_sdiv_wrong_div(i32 %x) { ; CHECK-LABEL: @floor_sdiv_wrong_div( ; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 8 @@ -2534,6 +2532,8 @@ define i32 @floor_sdiv_wrong_div(i32 %x) { ret i32 %r } +; negative test + define i32 @floor_sdiv_wrong_mask(i32 %x) { ; CHECK-LABEL: @floor_sdiv_wrong_mask( ; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4 @@ -2551,6 +2551,8 @@ define i32 @floor_sdiv_wrong_mask(i32 %x) { ret i32 %r } +; negative test + define i32 @floor_sdiv_wrong_cmp(i32 %x) { ; CHECK-LABEL: @floor_sdiv_wrong_cmp( ; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4 @@ -2568,6 +2570,8 @@ define i32 @floor_sdiv_wrong_cmp(i32 %x) { ret i32 %r } +; negative test + define i32 @floor_sdiv_wrong_ext(i32 %x) { ; CHECK-LABEL: @floor_sdiv_wrong_ext( ; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4 @@ -2585,6 +2589,8 @@ define i32 @floor_sdiv_wrong_ext(i32 %x) { ret i32 %r } +; negative test + define i32 @floor_sdiv_wrong_op(i32 %x, i32 %y) { ; CHECK-LABEL: @floor_sdiv_wrong_op( ; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 4