From ee8f7a53db2bc56e85d7e63a31228cf7e7453835 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Wed, 4 Jun 2025 18:34:08 -0400 Subject: [PATCH 1/7] InstCombine: improve optimizations for ceiling division with no overflow fixes #142497 --- .../InstCombine/InstCombineAddSub.cpp | 44 +++++ llvm/test/Transforms/InstCombine/add.ll | 157 ++++++++++++++++++ 2 files changed, 201 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index a9ac5ff9b9c89..16ebd7bceff63 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1787,6 +1787,50 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (Instruction *Ashr = foldAddToAshr(I)) return Ashr; + // Ceiling division by power-of-2: + // (X >> log2(N)) + zext(X & (N-1) != 0) --> (X + (N-1)) >> log2(N) + // This is valid when adding (N-1) to X doesn't overflow. + { + Value *X = nullptr, *Cmp = nullptr; + const APInt *ShiftAmt = nullptr, *Mask = nullptr; + CmpPredicate Pred; + + // Match: (X >> C) + zext((X & Mask) != 0) + // or: zext((X & Mask) != 0) + (X >> C) + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + + // Try matching with shift on left, zext on right + bool Matched = false; + if (match(Op0, m_LShr(m_Value(X), m_APInt(ShiftAmt))) && + match(Op1, m_ZExt(m_Value(Cmp)))) { + Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)), + m_ZeroInt())); + } else if (match(Op1, m_LShr(m_Value(X), m_APInt(ShiftAmt))) && + match(Op0, m_ZExt(m_Value(Cmp)))) { + Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)), + m_ZeroInt())); + } + + if (Matched && + Pred == ICmpInst::ICMP_NE && + ShiftAmt && ShiftAmt->uge(1) && ShiftAmt->ult(BitWidth) && + Mask && *Mask == (APInt(BitWidth, 1) << *ShiftAmt) - 1) { + + // Check if X + Mask doesn't overflow + Constant *MaskC = ConstantInt::get(X->getType(), *Mask); + bool WillNotOverflowUnsigned = willNotOverflowUnsignedAdd(X, MaskC, I); + + if (WillNotOverflowUnsigned) { + // (X + Mask) >> ShiftAmt + bool WillNotOverflowSigned = willNotOverflowSignedAdd(X, MaskC, I); + Value *Add = Builder.CreateAdd(X, MaskC, "", WillNotOverflowUnsigned, + WillNotOverflowSigned); + return BinaryOperator::CreateLShr(Add, ConstantInt::get(X->getType(), *ShiftAmt)); + } + } + } + // (~X) + (~Y) --> -2 - (X + Y) { // To ensure we can save instructions we need to ensure that we consume both diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 495f99824652d..d364082eab317 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -4273,4 +4273,161 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) { } declare void @llvm.assume(i1) +declare i32 @llvm.ctlz.i32(i32, i1) + +; Ceiling division by power-of-2: (x >> log2(N)) + ((x & (N-1)) != 0) -> (x + (N-1)) >> log2(N) +; This is only valid when x + (N-1) doesn't overflow + +; Test with known range that prevents overflow +define noundef range(i32 0, 100) i32 @ceil_div_by_8_known_range(i32 noundef range(i32 0, 100) %x) { +; CHECK-LABEL: @ceil_div_by_8_known_range( +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7 +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + %and = and i32 %x, 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + %r = add i32 %shr, %ext + ret i32 %r +} + +; Test with the exact IR from the original testcase +define noundef range(i32 0, 6) i32 @ceil_div_from_clz(i32 noundef %v) { +; CHECK-LABEL: @ceil_div_from_clz( +; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 39, [[CTLZ]] +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %ctlz = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %v, i1 false) + %sub = sub nuw nsw i32 32, %ctlz + %shr = lshr i32 %sub, 3 + %and = and i32 %sub, 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + %r = add nuw nsw i32 %shr, %ext + ret i32 %r +} + +; Vector version with known range +define <2 x i32> @ceil_div_by_8_vec_range(<2 x i32> range(i32 0, 1000) %x) { +; CHECK-LABEL: @ceil_div_by_8_vec_range( +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3) +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %shr = lshr <2 x i32> %x, + %and = and <2 x i32> %x, + %cmp = icmp ne <2 x i32> %and, + %ext = zext <2 x i1> %cmp to <2 x i32> + %r = add <2 x i32> %shr, %ext + ret <2 x i32> %r +} + +; Ceiling division by 16 with known range +define i16 @ceil_div_by_16_i16(i16 range(i16 0, 1000) %x) { +; CHECK-LABEL: @ceil_div_by_16_i16( +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[X:%.*]], 15 +; CHECK-NEXT: [[R:%.*]] = lshr i16 [[TMP1]], 4 +; CHECK-NEXT: ret i16 [[R]] +; + %shr = lshr i16 %x, 4 + %and = and i16 %x, 15 + %cmp = icmp ne i16 %and, 0 + %ext = zext i1 %cmp to i16 + %r = add i16 %shr, %ext + ret i16 %r +} + +; Negative test: no overflow guarantee - should NOT optimize +define i32 @ceil_div_by_8_no_overflow_info(i32 %x) { +; CHECK-LABEL: @ceil_div_by_8_no_overflow_info( +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]] +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + %and = and i32 %x, 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + %r = add i32 %shr, %ext + ret i32 %r +} + +; Negative test: nuw on final add doesn't help +define i32 @ceil_div_by_8_only_nuw_on_add(i32 %x) { +; CHECK-LABEL: @ceil_div_by_8_only_nuw_on_add( +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]] +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + %and = and i32 %x, 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + %r = add nuw i32 %shr, %ext ; nuw here doesn't prove x+7 won't overflow + ret i32 %r +} + +; Negative test: wrong mask +define i32 @ceil_div_wrong_mask(i32 range(i32 0, 100) %x) { +; CHECK-LABEL: @ceil_div_wrong_mask( +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]] +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + %and = and i32 %x, 6 ; Wrong mask: should be 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + %r = add i32 %shr, %ext + ret i32 %r +} + +; Negative test: wrong shift amount +define i32 @ceil_div_wrong_shift(i32 range(i32 0, 100) %x) { +; CHECK-LABEL: @ceil_div_wrong_shift( +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 4 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]] +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 4 ; Shift by 4, but mask is 7 (should be 15) + %and = and i32 %x, 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + %r = add i32 %shr, %ext + ret i32 %r +} + +; Negative test: wrong comparison +define i32 @ceil_div_wrong_cmp(i32 range(i32 0, 100) %x) { +; CHECK-LABEL: @ceil_div_wrong_cmp( +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]] +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + %and = and i32 %x, 7 + %cmp = icmp eq i32 %and, 0 ; Wrong: should be ne + %ext = zext i1 %cmp to i32 + %r = add i32 %shr, %ext + ret i32 %r +} declare void @fake_func(i32) From 2b55fe227d63472a27f6572fc26695e714ae72f4 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Thu, 5 Jun 2025 20:49:07 -0400 Subject: [PATCH 2/7] review feedback: make use of m_c_Add and m_LowBitMask helpers --- .../InstCombine/InstCombineAddSub.cpp | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 16ebd7bceff63..a2f89708009f7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1791,31 +1791,20 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { // (X >> log2(N)) + zext(X & (N-1) != 0) --> (X + (N-1)) >> log2(N) // This is valid when adding (N-1) to X doesn't overflow. { - Value *X = nullptr, *Cmp = nullptr; + Value *X = nullptr, *Cmp = nullptr, *Shift = nullptr; const APInt *ShiftAmt = nullptr, *Mask = nullptr; CmpPredicate Pred; // Match: (X >> C) + zext((X & Mask) != 0) // or: zext((X & Mask) != 0) + (X >> C) - Value *Op0 = I.getOperand(0); - Value *Op1 = I.getOperand(1); - - // Try matching with shift on left, zext on right - bool Matched = false; - if (match(Op0, m_LShr(m_Value(X), m_APInt(ShiftAmt))) && - match(Op1, m_ZExt(m_Value(Cmp)))) { - Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)), - m_ZeroInt())); - } else if (match(Op1, m_LShr(m_Value(X), m_APInt(ShiftAmt))) && - match(Op0, m_ZExt(m_Value(Cmp)))) { - Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)), - m_ZeroInt())); - } - - if (Matched && + if (match(&I, m_c_Add(m_Value(Shift), m_ZExt(m_Value(Cmp)))) && + match(Shift, m_LShr(m_Value(X), m_APInt(ShiftAmt))) && + Shift->hasOneUse() && + match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_LowBitMask(Mask)), + m_ZeroInt())) && Pred == ICmpInst::ICMP_NE && ShiftAmt && ShiftAmt->uge(1) && ShiftAmt->ult(BitWidth) && - Mask && *Mask == (APInt(BitWidth, 1) << *ShiftAmt) - 1) { + Mask && Mask->popcount() == *ShiftAmt) { // Check if X + Mask doesn't overflow Constant *MaskC = ConstantInt::get(X->getType(), *Mask); From 1df936da385fa04b024401937fb45b30c6c32755 Mon Sep 17 00:00:00 2001 From: gaynor-anthropic Date: Sat, 7 Jun 2025 13:47:17 -0700 Subject: [PATCH 3/7] code review: apply suggestions Co-authored-by: Yingwei Zheng --- .../Transforms/InstCombine/InstCombineAddSub.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index a2f89708009f7..767acc3d3019e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1797,14 +1797,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { // Match: (X >> C) + zext((X & Mask) != 0) // or: zext((X & Mask) != 0) + (X >> C) - if (match(&I, m_c_Add(m_Value(Shift), m_ZExt(m_Value(Cmp)))) && - match(Shift, m_LShr(m_Value(X), m_APInt(ShiftAmt))) && - Shift->hasOneUse() && - match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_LowBitMask(Mask)), - m_ZeroInt())) && - Pred == ICmpInst::ICMP_NE && - ShiftAmt && ShiftAmt->uge(1) && ShiftAmt->ult(BitWidth) && - Mask && Mask->popcount() == *ShiftAmt) { + if (match(&I, m_c_Add(m_OneUse(m_LShr(m_Value(X), m_APInt(ShiftAmt))), m_ZExt(m_SpecificICmp(ICmpInst::ICMP_NE, m_And(m_Deferred(X), m_LowBitMask(Mask)), + m_ZeroInt())))) && Mask->popcount() == *ShiftAmt) { // Check if X + Mask doesn't overflow Constant *MaskC = ConstantInt::get(X->getType(), *Mask); @@ -1812,9 +1806,7 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (WillNotOverflowUnsigned) { // (X + Mask) >> ShiftAmt - bool WillNotOverflowSigned = willNotOverflowSignedAdd(X, MaskC, I); - Value *Add = Builder.CreateAdd(X, MaskC, "", WillNotOverflowUnsigned, - WillNotOverflowSigned); + Value *Add = Builder.CreateNUWAdd(X, MaskC); return BinaryOperator::CreateLShr(Add, ConstantInt::get(X->getType(), *ShiftAmt)); } } From 339675ff867b313d5da9b179893e62a8e08b816e Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 7 Jun 2025 16:49:39 -0400 Subject: [PATCH 4/7] clang-format --- .../Transforms/InstCombine/InstCombineAddSub.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 767acc3d3019e..12994e280f7fb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1791,14 +1791,18 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { // (X >> log2(N)) + zext(X & (N-1) != 0) --> (X + (N-1)) >> log2(N) // This is valid when adding (N-1) to X doesn't overflow. { - Value *X = nullptr, *Cmp = nullptr, *Shift = nullptr; + Value *X = nullptr; const APInt *ShiftAmt = nullptr, *Mask = nullptr; CmpPredicate Pred; // Match: (X >> C) + zext((X & Mask) != 0) // or: zext((X & Mask) != 0) + (X >> C) - if (match(&I, m_c_Add(m_OneUse(m_LShr(m_Value(X), m_APInt(ShiftAmt))), m_ZExt(m_SpecificICmp(ICmpInst::ICMP_NE, m_And(m_Deferred(X), m_LowBitMask(Mask)), - m_ZeroInt())))) && Mask->popcount() == *ShiftAmt) { + if (match(&I, m_c_Add(m_OneUse(m_LShr(m_Value(X), m_APInt(ShiftAmt))), + m_ZExt(m_SpecificICmp( + ICmpInst::ICMP_NE, + m_And(m_Deferred(X), m_LowBitMask(Mask)), + m_ZeroInt())))) && + Mask->popcount() == *ShiftAmt) { // Check if X + Mask doesn't overflow Constant *MaskC = ConstantInt::get(X->getType(), *Mask); @@ -1807,7 +1811,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (WillNotOverflowUnsigned) { // (X + Mask) >> ShiftAmt Value *Add = Builder.CreateNUWAdd(X, MaskC); - return BinaryOperator::CreateLShr(Add, ConstantInt::get(X->getType(), *ShiftAmt)); + return BinaryOperator::CreateLShr( + Add, ConstantInt::get(X->getType(), *ShiftAmt)); } } } From 74fc5e1aed804e783fd20d86e80cd22ade7dde13 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sat, 7 Jun 2025 16:58:00 -0400 Subject: [PATCH 5/7] addeditional test cases --- llvm/test/Transforms/InstCombine/add.ll | 84 +++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index d364082eab317..74c022b07a9a3 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -4430,4 +4430,88 @@ define i32 @ceil_div_wrong_cmp(i32 range(i32 0, 100) %x) { %r = add i32 %shr, %ext ret i32 %r } + +; Multi-use test: all intermediate values have uses +define i32 @ceil_div_multi_use(i32 range(i32 0, 100) %x) { +; CHECK-LABEL: @ceil_div_multi_use( +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3 +; CHECK-NEXT: call void @use_i32(i32 [[SHR]]) +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: call void @use_i32(i32 [[AND]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: call void @use_i32(i32 [[EXT]]) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]] +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + call void @use_i32(i32 %shr) + %and = and i32 %x, 7 + call void @use_i32(i32 %and) + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + call void @use_i32(i32 %ext) + %r = add i32 %shr, %ext + ret i32 %r +} + +; Commuted test: add operands are swapped +define i32 @ceil_div_commuted(i32 range(i32 0, 100) %x) { +; CHECK-LABEL: @ceil_div_commuted( +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7 +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + %and = and i32 %x, 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + %r = add i32 %ext, %shr ; Operands swapped + ret i32 %r +} + +; Commuted with multi-use +define i32 @ceil_div_commuted_multi_use(i32 range(i32 0, 100) %x) { +; CHECK-LABEL: @ceil_div_commuted_multi_use( +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3 +; CHECK-NEXT: call void @use_i32(i32 [[SHR]]) +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: call void @use_i32(i32 [[EXT]]) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]] +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + call void @use_i32(i32 %shr) + %and = and i32 %x, 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + call void @use_i32(i32 %ext) + %r = add i32 %ext, %shr ; Operands swapped + ret i32 %r +} + +; Multi-use with vector type +define <2 x i32> @ceil_div_vec_multi_use(<2 x i32> range(i32 0, 1000) %x) { +; CHECK-LABEL: @ceil_div_vec_multi_use( +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 3) +; CHECK-NEXT: call void @use_vec(<2 x i32> [[SHR]]) +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], splat (i32 7) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer +; CHECK-NEXT: [[EXT:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[SHR]], [[EXT]] +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %shr = lshr <2 x i32> %x, + call void @use_vec(<2 x i32> %shr) + %and = and <2 x i32> %x, + %cmp = icmp ne <2 x i32> %and, + %ext = zext <2 x i1> %cmp to <2 x i32> + %r = add <2 x i32> %shr, %ext + ret <2 x i32> %r +} + +declare void @use_i32(i32) +declare void @use_vec(<2 x i32>) declare void @fake_func(i32) From e26a702d96f612460ac6750b000b0a6db13acee7 Mon Sep 17 00:00:00 2001 From: gaynor-anthropic Date: Mon, 16 Jun 2025 14:03:15 -0700 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: Nikita Popov --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 8 +++----- llvm/test/Transforms/InstCombine/add.ll | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 12994e280f7fb..5dae5f2621a3e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1791,8 +1791,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { // (X >> log2(N)) + zext(X & (N-1) != 0) --> (X + (N-1)) >> log2(N) // This is valid when adding (N-1) to X doesn't overflow. { - Value *X = nullptr; - const APInt *ShiftAmt = nullptr, *Mask = nullptr; + Value *X; + const APInt *ShiftAmt, *Mask; CmpPredicate Pred; // Match: (X >> C) + zext((X & Mask) != 0) @@ -1806,9 +1806,7 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { // Check if X + Mask doesn't overflow Constant *MaskC = ConstantInt::get(X->getType(), *Mask); - bool WillNotOverflowUnsigned = willNotOverflowUnsignedAdd(X, MaskC, I); - - if (WillNotOverflowUnsigned) { + if (willNotOverflowUnsignedAdd(X, MaskC, I)) { // (X + Mask) >> ShiftAmt Value *Add = Builder.CreateNUWAdd(X, MaskC); return BinaryOperator::CreateLShr( diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 74c022b07a9a3..47e5a8423b78f 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -4279,7 +4279,7 @@ declare i32 @llvm.ctlz.i32(i32, i1) ; This is only valid when x + (N-1) doesn't overflow ; Test with known range that prevents overflow -define noundef range(i32 0, 100) i32 @ceil_div_by_8_known_range(i32 noundef range(i32 0, 100) %x) { +define i32 @ceil_div_by_8_known_range(i32 range(i32 0, 100) %x) { ; CHECK-LABEL: @ceil_div_by_8_known_range( ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7 ; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3 @@ -4294,7 +4294,7 @@ define noundef range(i32 0, 100) i32 @ceil_div_by_8_known_range(i32 noundef rang } ; Test with the exact IR from the original testcase -define noundef range(i32 0, 6) i32 @ceil_div_from_clz(i32 noundef %v) { +define i32 @ceil_div_from_clz(i32 %v) { ; CHECK-LABEL: @ceil_div_from_clz( ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 39, [[CTLZ]] From 529b16b5e9ff12c499ec2ed22dc42e568668ff24 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Mon, 16 Jun 2025 17:11:26 -0400 Subject: [PATCH 7/7] Add test case requested in code review --- llvm/test/Transforms/InstCombine/add.ll | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 47e5a8423b78f..a16e30bb49452 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -4492,6 +4492,26 @@ define i32 @ceil_div_commuted_multi_use(i32 range(i32 0, 100) %x) { ret i32 %r } +; Multi-use test where only zext has multiple uses - should still optimize +define i32 @ceil_div_zext_multi_use(i32 range(i32 0, 100) %x) { +; CHECK-LABEL: @ceil_div_zext_multi_use( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: call void @use_i32(i32 [[EXT]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X]], 7 +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[R]] +; + %shr = lshr i32 %x, 3 + %and = and i32 %x, 7 + %cmp = icmp ne i32 %and, 0 + %ext = zext i1 %cmp to i32 + call void @use_i32(i32 %ext) + %r = add i32 %shr, %ext + ret i32 %r +} + ; Multi-use with vector type define <2 x i32> @ceil_div_vec_multi_use(<2 x i32> range(i32 0, 1000) %x) { ; CHECK-LABEL: @ceil_div_vec_multi_use(