From fa77c2c10596acec00ee517297dc92d2bee09360 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sat, 27 Sep 2025 22:24:16 +0200 Subject: [PATCH 01/18] [InstCombine] Transform `vector.reduce.add (splat %0, 4)` into `shl i32 %0, 2` Fixes #160066 Whenever we have a vector with all the same elemnts, created with `insertelement` and `shufflevector` and the result type's element number is a power of two and we sum the vector, we have a multiplication by a power of two, which can be replaced with a left shift. --- .../InstCombine/InstCombineCalls.cpp | 33 +++++++++ .../InstCombine/vector-reductions.ll | 70 +++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 6ad493772d170..49f6b86fa8f30 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3761,6 +3761,39 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return replaceInstUsesWith(CI, Res); } } + + // Handle the case where a value is multiplied by a power of two. + // For example: + // %2 = insertelement <4 x i32> poison, i32 %0, i64 0 + // %3 = shufflevector <4 x i32> %2, poison, <4 x i32> zeroinitializer + // %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3) + // => + // %2 = shl i32 %0, 2 + Value *InputValue; + ArrayRef Mask; + ConstantInt *InsertionIdx; + assert(Arg->getType()->isVectorTy() && + "The vector.reduce.add intrinsic's argument must be a vector!"); + + if (match(Arg, m_Shuffle(m_InsertElt(m_Poison(), m_Value(InputValue), + m_ConstantInt(InsertionIdx)), + m_Poison(), m_Mask(Mask)))) { + // It is only a multiplication if we add the same element over and over. + bool AllElementsAreTheSameInMask = + std::all_of(Mask.begin(), Mask.end(), + [&Mask](int MaskElt) { return MaskElt == Mask[0]; }); + unsigned ReducedVectorLength = Mask.size(); + + if (AllElementsAreTheSameInMask && + InsertionIdx->getSExtValue() == Mask[0] && + isPowerOf2_32(ReducedVectorLength)) { + unsigned Pow2 = Log2_32(ReducedVectorLength); + Value *Res = Builder.CreateShl( + InputValue, Constant::getIntegerValue(InputValue->getType(), + APInt(32, Pow2))); + return replaceInstUsesWith(CI, Res); + } + } } [[fallthrough]]; } diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index 10f4aca72dbc7..2547403386106 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -308,3 +308,73 @@ define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) { %r = sub i32 %r0, %r1 ret i32 %r } + +define i32 @constant_multiplied_at_0(i32 %0) { +; CHECK-LABEL: @constant_multiplied_at_0( +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = insertelement <4 x i32> poison, i32 %0, i64 0 + %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer + %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3) + ret i32 %4 +} + +define i32 @constant_multiplied_at_0_two_pow8(i32 %0) { +; CHECK-LABEL: @constant_multiplied_at_0_two_pow8( +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = insertelement <4 x i32> poison, i32 %0, i64 0 + %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <8 x i32> zeroinitializer + %4 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %3) + ret i32 %4 +} + + +define i32 @constant_multiplied_at_0_two_pow16(i32 %0) { +; CHECK-LABEL: @constant_multiplied_at_0_two_pow16( +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 4 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = insertelement <4 x i32> poison, i32 %0, i64 0 + %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <16 x i32> zeroinitializer + %4 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3) + ret i32 %4 +} + + +define i32 @constant_multiplied_at_1(i32 %0) { +; CHECK-LABEL: @constant_multiplied_at_1( +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = insertelement <4 x i32> poison, i32 %0, i64 1 + %3 = shufflevector <4 x i32> %2, <4 x i32> poison, + <4 x i32> + %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3) + ret i32 %4 +} + +define i32 @negative_constant_multiplied_at_1(i32 %0) { +; CHECK-LABEL: @negative_constant_multiplied_at_1( +; CHECK-NEXT: ret i32 poison +; + %2 = insertelement <4 x i32> poison, i32 %0, i64 1 + %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer + %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3) + ret i32 %4 +} + +define i32 @negative_constant_multiplied_non_power_of_2(i32 %0) { +; CHECK-LABEL: @negative_constant_multiplied_non_power_of_2( +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <6 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> [[TMP3]]) +; CHECK-NEXT: ret i32 [[TMP4]] +; + %2 = insertelement <4 x i32> poison, i32 %0, i64 0 + %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <6 x i32> zeroinitializer + %4 = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> %3) + ret i32 %4 +} From fb492be297083c777d3a856d0e8248099d8667df Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sat, 27 Sep 2025 23:17:09 +0200 Subject: [PATCH 02/18] Use getSplatValue and correctly construct APInt and add i64 test --- .../InstCombine/InstCombineCalls.cpp | 34 ++++++++----------- .../InstCombine/vector-reductions.ll | 11 ++++++ 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 49f6b86fa8f30..4ca985c2e959d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -64,6 +64,7 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/KnownFPClass.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" @@ -3769,29 +3770,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3) // => // %2 = shl i32 %0, 2 - Value *InputValue; - ArrayRef Mask; - ConstantInt *InsertionIdx; assert(Arg->getType()->isVectorTy() && "The vector.reduce.add intrinsic's argument must be a vector!"); - if (match(Arg, m_Shuffle(m_InsertElt(m_Poison(), m_Value(InputValue), - m_ConstantInt(InsertionIdx)), - m_Poison(), m_Mask(Mask)))) { + if (Value *Splat = getSplatValue(Arg)) { // It is only a multiplication if we add the same element over and over. - bool AllElementsAreTheSameInMask = - std::all_of(Mask.begin(), Mask.end(), - [&Mask](int MaskElt) { return MaskElt == Mask[0]; }); - unsigned ReducedVectorLength = Mask.size(); - - if (AllElementsAreTheSameInMask && - InsertionIdx->getSExtValue() == Mask[0] && - isPowerOf2_32(ReducedVectorLength)) { - unsigned Pow2 = Log2_32(ReducedVectorLength); - Value *Res = Builder.CreateShl( - InputValue, Constant::getIntegerValue(InputValue->getType(), - APInt(32, Pow2))); - return replaceInstUsesWith(CI, Res); + ElementCount ReducedVectorElementCount = + static_cast(Arg->getType())->getElementCount(); + if (ReducedVectorElementCount.isFixed()) { + unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); + if (isPowerOf2_32(VectorSize)) { + unsigned Pow2 = Log2_32(VectorSize); + Value *Res = Builder.CreateShl( + Splat, + Constant::getIntegerValue( + Splat->getType(), + APInt(Splat->getType()->getIntegerBitWidth(), Pow2))); + return replaceInstUsesWith(CI, Res); + } } } } diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index 2547403386106..d62dce01ac193 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -320,6 +320,17 @@ define i32 @constant_multiplied_at_0(i32 %0) { ret i32 %4 } +define i64 @constant_multiplied_at_0_64bits(i64 %0) { +; CHECK-LABEL: @constant_multiplied_at_0_64bits( +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 2 +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = insertelement <4 x i64> poison, i64 %0, i64 0 + %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <4 x i32> zeroinitializer + %4 = tail call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %3) + ret i64 %4 +} + define i32 @constant_multiplied_at_0_two_pow8(i32 %0) { ; CHECK-LABEL: @constant_multiplied_at_0_two_pow8( ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 3 From e9cc989038328391c207b413487b6c09feaa4611 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sat, 27 Sep 2025 23:26:42 +0200 Subject: [PATCH 03/18] Address non power of 2 cases --- .../InstCombine/InstCombineCalls.cpp | 17 ++++++++++----- .../InstCombine/vector-reductions.ll | 21 +++++++++++++------ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4ca985c2e959d..74c263e86f4a4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3779,15 +3779,22 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { static_cast(Arg->getType())->getElementCount(); if (ReducedVectorElementCount.isFixed()) { unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); + Type *SplatType = Splat->getType(); + unsigned SplatTypeWidth = SplatType->getIntegerBitWidth(); + Value *Res; + // Power of two is a special case. We can just use a left shif here. if (isPowerOf2_32(VectorSize)) { unsigned Pow2 = Log2_32(VectorSize); - Value *Res = Builder.CreateShl( - Splat, - Constant::getIntegerValue( - Splat->getType(), - APInt(Splat->getType()->getIntegerBitWidth(), Pow2))); + Res = Builder.CreateShl( + Splat, Constant::getIntegerValue(SplatType, + APInt(SplatTypeWidth, Pow2))); return replaceInstUsesWith(CI, Res); } + // Otherwise just multiply. + Res = Builder.CreateMul( + Splat, Constant::getIntegerValue( + SplatType, APInt(SplatTypeWidth, VectorSize))); + return replaceInstUsesWith(CI, Res); } } } diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index d62dce01ac193..e071415d2d6c1 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -377,15 +377,24 @@ define i32 @negative_constant_multiplied_at_1(i32 %0) { ret i32 %4 } -define i32 @negative_constant_multiplied_non_power_of_2(i32 %0) { -; CHECK-LABEL: @negative_constant_multiplied_non_power_of_2( -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <6 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> [[TMP3]]) -; CHECK-NEXT: ret i32 [[TMP4]] +define i32 @constant_multiplied_non_power_of_2(i32 %0) { +; CHECK-LABEL: @constant_multiplied_non_power_of_2( +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 6 +; CHECK-NEXT: ret i32 [[TMP2]] ; %2 = insertelement <4 x i32> poison, i32 %0, i64 0 %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <6 x i32> zeroinitializer %4 = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> %3) ret i32 %4 } + +define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) { +; CHECK-LABEL: @constant_multiplied_non_power_of_2_i64( +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0:%.*]], 6 +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = insertelement <4 x i64> poison, i64 %0, i64 0 + %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <6 x i32> zeroinitializer + %4 = tail call i64 @llvm.vector.reduce.add.v6i64(<6 x i64> %3) + ret i64 %4 +} From a8b32afe1930b3f922676ad12cc0d40f4c08fd31 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 08:55:10 +0200 Subject: [PATCH 04/18] Update comments and move assertion to a more fitting place --- .../Transforms/InstCombine/InstCombineCalls.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 74c263e86f4a4..4bb9ba39318aa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3763,18 +3763,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } - // Handle the case where a value is multiplied by a power of two. - // For example: - // %2 = insertelement <4 x i32> poison, i32 %0, i64 0 - // %3 = shufflevector <4 x i32> %2, poison, <4 x i32> zeroinitializer - // %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3) + // Handle the case where a splat is summarized. In that case we have a + // multpilication. For example: %2 = insertelement <4 x i32> poison, i32 + // %0, i64 0 %3 = shufflevector <4 x i32> %2, poison, <4 x i32> + // zeroinitializer %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3) // => // %2 = shl i32 %0, 2 - assert(Arg->getType()->isVectorTy() && - "The vector.reduce.add intrinsic's argument must be a vector!"); - if (Value *Splat = getSplatValue(Arg)) { // It is only a multiplication if we add the same element over and over. + assert(Arg->getType()->isVectorTy() && + "The vector.reduce.add intrinsic's argument must be a vector!"); ElementCount ReducedVectorElementCount = static_cast(Arg->getType())->getElementCount(); if (ReducedVectorElementCount.isFixed()) { From d11a108e80b763a537663e925b1cfdb05caa61ad Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 11:46:51 +0200 Subject: [PATCH 05/18] Remove redundant power of 2 case --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4bb9ba39318aa..d745598f8ffd9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3779,17 +3779,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); Type *SplatType = Splat->getType(); unsigned SplatTypeWidth = SplatType->getIntegerBitWidth(); - Value *Res; - // Power of two is a special case. We can just use a left shif here. - if (isPowerOf2_32(VectorSize)) { - unsigned Pow2 = Log2_32(VectorSize); - Res = Builder.CreateShl( - Splat, Constant::getIntegerValue(SplatType, - APInt(SplatTypeWidth, Pow2))); - return replaceInstUsesWith(CI, Res); - } - // Otherwise just multiply. - Res = Builder.CreateMul( + Value *Res = Builder.CreateMul( Splat, Constant::getIntegerValue( SplatType, APInt(SplatTypeWidth, VectorSize))); return replaceInstUsesWith(CI, Res); From 01eb5719465b322ba26fb77cbf2d79bbd1ff6b85 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 11:54:43 +0200 Subject: [PATCH 06/18] Use ConstantInt::get instead of Constant::getIntegerValue --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index d745598f8ffd9..a21f9f75e4bcc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3768,7 +3768,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // %0, i64 0 %3 = shufflevector <4 x i32> %2, poison, <4 x i32> // zeroinitializer %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3) // => - // %2 = shl i32 %0, 2 + // %2 = mul i32 %0, 4 if (Value *Splat = getSplatValue(Arg)) { // It is only a multiplication if we add the same element over and over. assert(Arg->getType()->isVectorTy() && @@ -3778,10 +3778,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (ReducedVectorElementCount.isFixed()) { unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); Type *SplatType = Splat->getType(); - unsigned SplatTypeWidth = SplatType->getIntegerBitWidth(); - Value *Res = Builder.CreateMul( - Splat, Constant::getIntegerValue( - SplatType, APInt(SplatTypeWidth, VectorSize))); + Value *Res = + Builder.CreateMul(Splat, ConstantInt::get(SplatType, VectorSize)); return replaceInstUsesWith(CI, Res); } } From 0adef1dd16dc8c8530696557133e8d4da8c8df42 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 13:33:23 +0200 Subject: [PATCH 07/18] Add i1 test --- .../Transforms/InstCombine/vector-reductions.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index e071415d2d6c1..75948a2575d42 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -398,3 +398,18 @@ define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) { %4 = tail call i64 @llvm.vector.reduce.add.v6i64(<6 x i64> %3) ret i64 %4 } + +define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) { +; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1( +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> poison, i1 [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP6]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[TMP5:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[TMP4]]) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i8 [[TMP5]] to i1 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %2 = insertelement <8 x i1> poison, i1 %0, i32 0 + %3 = shufflevector <8 x i1> %2, <8 x i1> poison, <8 x i32> zeroinitializer + %4 = tail call i1 @llvm.vector.reduce.add.v6i1(<8 x i1> %3) + ret i1 %4 +} From d2f235e2f6d8b5368fc7ec7e864cbe977489cba5 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 13:54:51 +0200 Subject: [PATCH 08/18] More small type tests --- .../InstCombine/vector-reductions.ll | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index 75948a2575d42..d786363075c1a 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -410,6 +410,46 @@ define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) { ; %2 = insertelement <8 x i1> poison, i1 %0, i32 0 %3 = shufflevector <8 x i1> %2, <8 x i1> poison, <8 x i32> zeroinitializer - %4 = tail call i1 @llvm.vector.reduce.add.v6i1(<8 x i1> %3) + %4 = tail call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> %3) ret i1 %4 } + +define i1 @constant_multiplied_non_power_of_2_i1x4(i1 %0) { +; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x4( +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> poison, i1 [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 +; CHECK-NEXT: [[TMP5:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i4 [[TMP5]] to i1 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %2 = insertelement <4 x i1> poison, i1 %0, i32 0 + %3 = shufflevector <4 x i1> %2, <4 x i1> poison, <4 x i32> zeroinitializer + %4 = tail call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %3) + ret i1 %4 +} + +define i1 @constant_multiplied_non_power_of_2_i1x2(i1 %0) { +; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x2( +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> poison, i1 [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i1> [[TMP3]] to i2 +; CHECK-NEXT: [[TMP5:%.*]] = call range(i2 0, -1) i2 @llvm.ctpop.i2(i2 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i2 [[TMP5]] to i1 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %2 = insertelement <2 x i1> poison, i1 %0, i32 0 + %3 = shufflevector <2 x i1> %2, <2 x i1> poison, <2 x i32> zeroinitializer + %4 = tail call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> %3) + ret i1 %4 +} + +define i2 @constant_multiplied_non_power_of_2_i2x4(i2 %0) { +; CHECK-LABEL: @constant_multiplied_non_power_of_2_i2x4( +; CHECK-NEXT: ret i2 0 +; + %2 = insertelement <4 x i2> poison, i2 %0, i32 0 + %3 = shufflevector <4 x i2> %2, <4 x i2> poison, <4 x i32> zeroinitializer + %4 = tail call i2 @llvm.vector.reduce.add.v4i2(<4 x i2> %3) + ret i2 %4 +} From 045f0efae1d6e6efa9fcc8456c1965a005d3c951 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 17:01:39 +0200 Subject: [PATCH 09/18] Throw out redundant i1 tests --- .../InstCombine/vector-reductions.ll | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index d786363075c1a..30f9e49f9fe10 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -414,36 +414,6 @@ define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) { ret i1 %4 } -define i1 @constant_multiplied_non_power_of_2_i1x4(i1 %0) { -; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x4( -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> poison, i1 [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 -; CHECK-NEXT: [[TMP5:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = trunc i4 [[TMP5]] to i1 -; CHECK-NEXT: ret i1 [[TMP6]] -; - %2 = insertelement <4 x i1> poison, i1 %0, i32 0 - %3 = shufflevector <4 x i1> %2, <4 x i1> poison, <4 x i32> zeroinitializer - %4 = tail call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %3) - ret i1 %4 -} - -define i1 @constant_multiplied_non_power_of_2_i1x2(i1 %0) { -; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x2( -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> poison, i1 [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i1> [[TMP3]] to i2 -; CHECK-NEXT: [[TMP5:%.*]] = call range(i2 0, -1) i2 @llvm.ctpop.i2(i2 [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = trunc i2 [[TMP5]] to i1 -; CHECK-NEXT: ret i1 [[TMP6]] -; - %2 = insertelement <2 x i1> poison, i1 %0, i32 0 - %3 = shufflevector <2 x i1> %2, <2 x i1> poison, <2 x i32> zeroinitializer - %4 = tail call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> %3) - ret i1 %4 -} - define i2 @constant_multiplied_non_power_of_2_i2x4(i2 %0) { ; CHECK-LABEL: @constant_multiplied_non_power_of_2_i2x4( ; CHECK-NEXT: ret i2 0 From 027efe7c0bfb0a377acf0ef4b71bc46124f13fb5 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 17:06:10 +0200 Subject: [PATCH 10/18] More consistent test naming --- .../InstCombine/vector-reductions.ll | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index 30f9e49f9fe10..5a2e3e73978a4 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -309,8 +309,8 @@ define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) { ret i32 %r } -define i32 @constant_multiplied_at_0(i32 %0) { -; CHECK-LABEL: @constant_multiplied_at_0( +define i32 @constant_multiplied_4xi32(i32 %0) { +; CHECK-LABEL: @constant_multiplied_4xi32( ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2 ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -320,8 +320,8 @@ define i32 @constant_multiplied_at_0(i32 %0) { ret i32 %4 } -define i64 @constant_multiplied_at_0_64bits(i64 %0) { -; CHECK-LABEL: @constant_multiplied_at_0_64bits( +define i64 @constant_multiplied_4xi64(i64 %0) { +; CHECK-LABEL: @constant_multiplied_4xi64( ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 2 ; CHECK-NEXT: ret i64 [[TMP2]] ; @@ -331,8 +331,8 @@ define i64 @constant_multiplied_at_0_64bits(i64 %0) { ret i64 %4 } -define i32 @constant_multiplied_at_0_two_pow8(i32 %0) { -; CHECK-LABEL: @constant_multiplied_at_0_two_pow8( +define i32 @constant_multiplied_8xi32(i32 %0) { +; CHECK-LABEL: @constant_multiplied_8xi32( ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 3 ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -343,8 +343,8 @@ define i32 @constant_multiplied_at_0_two_pow8(i32 %0) { } -define i32 @constant_multiplied_at_0_two_pow16(i32 %0) { -; CHECK-LABEL: @constant_multiplied_at_0_two_pow16( +define i32 @constant_multiplied_16xi32(i32 %0) { +; CHECK-LABEL: @constant_multiplied_16xi32( ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 4 ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -355,8 +355,8 @@ define i32 @constant_multiplied_at_0_two_pow16(i32 %0) { } -define i32 @constant_multiplied_at_1(i32 %0) { -; CHECK-LABEL: @constant_multiplied_at_1( +define i32 @constant_multiplied_4xi32_at_idx1(i32 %0) { +; CHECK-LABEL: @constant_multiplied_4xi32_at_idx1( ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2 ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -367,8 +367,8 @@ define i32 @constant_multiplied_at_1(i32 %0) { ret i32 %4 } -define i32 @negative_constant_multiplied_at_1(i32 %0) { -; CHECK-LABEL: @negative_constant_multiplied_at_1( +define i32 @negative_constant_multiplied_4xi32(i32 %0) { +; CHECK-LABEL: @negative_constant_multiplied_4xi32( ; CHECK-NEXT: ret i32 poison ; %2 = insertelement <4 x i32> poison, i32 %0, i64 1 @@ -377,8 +377,8 @@ define i32 @negative_constant_multiplied_at_1(i32 %0) { ret i32 %4 } -define i32 @constant_multiplied_non_power_of_2(i32 %0) { -; CHECK-LABEL: @constant_multiplied_non_power_of_2( +define i32 @constant_multiplied_6xi32(i32 %0) { +; CHECK-LABEL: @constant_multiplied_6xi32( ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 6 ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -388,8 +388,8 @@ define i32 @constant_multiplied_non_power_of_2(i32 %0) { ret i32 %4 } -define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) { -; CHECK-LABEL: @constant_multiplied_non_power_of_2_i64( +define i64 @constant_multiplied_6xi64(i64 %0) { +; CHECK-LABEL: @constant_multiplied_6xi64( ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0:%.*]], 6 ; CHECK-NEXT: ret i64 [[TMP2]] ; @@ -399,14 +399,14 @@ define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) { ret i64 %4 } -define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) { -; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1( -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> poison, i1 [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP6]], <8 x i1> poison, <8 x i32> zeroinitializer +define i1 @constant_multiplied_8xi1(i1 %0) { +; CHECK-LABEL: @constant_multiplied_8xi1( +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 ; CHECK-NEXT: [[TMP5:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[TMP4]]) -; CHECK-NEXT: [[TMP2:%.*]] = trunc i8 [[TMP5]] to i1 -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc i8 [[TMP5]] to i1 +; CHECK-NEXT: ret i1 [[TMP6]] ; %2 = insertelement <8 x i1> poison, i1 %0, i32 0 %3 = shufflevector <8 x i1> %2, <8 x i1> poison, <8 x i32> zeroinitializer @@ -414,8 +414,8 @@ define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) { ret i1 %4 } -define i2 @constant_multiplied_non_power_of_2_i2x4(i2 %0) { -; CHECK-LABEL: @constant_multiplied_non_power_of_2_i2x4( +define i2 @constant_multiplied_4xi2(i2 %0) { +; CHECK-LABEL: @constant_multiplied_4xi2( ; CHECK-NEXT: ret i2 0 ; %2 = insertelement <4 x i2> poison, i2 %0, i32 0 From 8e2c2e57ac049abd152a836f4a2c5da36b8dedb9 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 17:07:46 +0200 Subject: [PATCH 11/18] Use cast instead of static_cast --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a21f9f75e4bcc..4de8758d170fa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3774,7 +3774,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { assert(Arg->getType()->isVectorTy() && "The vector.reduce.add intrinsic's argument must be a vector!"); ElementCount ReducedVectorElementCount = - static_cast(Arg->getType())->getElementCount(); + cast(Arg->getType())->getElementCount(); if (ReducedVectorElementCount.isFixed()) { unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); Type *SplatType = Splat->getType(); From ff6491be34198bb5a1a5c99d92a9bc146cb96a73 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 17:09:48 +0200 Subject: [PATCH 12/18] Use BinaryOperator::CreateMul instead of using Builder and replaceInstUsesWith --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4de8758d170fa..bfc7ed69d8b79 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3778,9 +3778,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (ReducedVectorElementCount.isFixed()) { unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); Type *SplatType = Splat->getType(); - Value *Res = - Builder.CreateMul(Splat, ConstantInt::get(SplatType, VectorSize)); - return replaceInstUsesWith(CI, Res); + return BinaryOperator::CreateMul( + Splat, ConstantInt::get(SplatType, VectorSize)); } } } From 38ca5ce0a39f0a9aa0c27b7e0fc66a937eaaa306 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 17:17:38 +0200 Subject: [PATCH 13/18] Extend testing --- .../InstCombine/vector-reductions.ll | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index 5a2e3e73978a4..4355772fd23c1 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -320,6 +320,17 @@ define i32 @constant_multiplied_4xi32(i32 %0) { ret i32 %4 } +define i32 @constant_multiplied_3xi32(i32 %0) { +; CHECK-LABEL: @constant_multiplied_3xi32( +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = insertelement <3 x i32> poison, i32 %0, i64 0 + %3 = shufflevector <3 x i32> %2, <3 x i32> poison, <3 x i32> zeroinitializer + %4 = tail call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %3) + ret i32 %4 +} + define i64 @constant_multiplied_4xi64(i64 %0) { ; CHECK-LABEL: @constant_multiplied_4xi64( ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 2 @@ -423,3 +434,35 @@ define i2 @constant_multiplied_4xi2(i2 %0) { %4 = tail call i2 @llvm.vector.reduce.add.v4i2(<4 x i2> %3) ret i2 %4 } + +define i2 @constant_multiplied_5xi2(i2 %0) { +; CHECK-LABEL: @constant_multiplied_5xi2( +; CHECK-NEXT: ret i2 [[TMP0:%.*]] +; + %2 = insertelement <5 x i2> poison, i2 %0, i64 0 + %3 = shufflevector <5 x i2> %2, <5 x i2> poison, <5 x i32> zeroinitializer + %4 = tail call i2 @llvm.vector.reduce.add.v5i2(<5 x i2> %3) + ret i2 %4 +} + +define i2 @constant_multiplied_6xi2(i2 %0) { +; CHECK-LABEL: @constant_multiplied_6xi2( +; CHECK-NEXT: [[TMP2:%.*]] = shl i2 [[TMP0:%.*]], 1 +; CHECK-NEXT: ret i2 [[TMP2]] +; + %2 = insertelement <6 x i2> poison, i2 %0, i64 0 + %3 = shufflevector <6 x i2> %2, <6 x i2> poison, <6 x i32> zeroinitializer + %4 = tail call i2 @llvm.vector.reduce.add.v6i2(<6 x i2> %3) + ret i2 %4 +} + +define i2 @constant_multiplied_7xi2(i2 %0) { +; CHECK-LABEL: @constant_multiplied_7xi2( +; CHECK-NEXT: [[TMP2:%.*]] = sub i2 0, [[TMP0:%.*]] +; CHECK-NEXT: ret i2 [[TMP2]] +; + %2 = insertelement <7 x i2> poison, i2 %0, i64 0 + %3 = shufflevector <7 x i2> %2, <7 x i2> poison, <7 x i32> zeroinitializer + %4 = tail call i2 @llvm.vector.reduce.add.v7i2(<7 x i2> %3) + ret i2 %4 +} From 78a00dd7c76f180b2f5c4fed46f2e9d374ac9ca9 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 17:44:23 +0200 Subject: [PATCH 14/18] Remove assertion --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bfc7ed69d8b79..b838d06e1d501 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3771,8 +3771,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // %2 = mul i32 %0, 4 if (Value *Splat = getSplatValue(Arg)) { // It is only a multiplication if we add the same element over and over. - assert(Arg->getType()->isVectorTy() && - "The vector.reduce.add intrinsic's argument must be a vector!"); ElementCount ReducedVectorElementCount = cast(Arg->getType())->getElementCount(); if (ReducedVectorElementCount.isFixed()) { From b86f985c87aea6c31f94ed3294f949a6823f744d Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 20:59:14 +0200 Subject: [PATCH 15/18] Remove redundant variable and comment --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index b838d06e1d501..8b0385b402a7d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3763,21 +3763,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } - // Handle the case where a splat is summarized. In that case we have a - // multpilication. For example: %2 = insertelement <4 x i32> poison, i32 - // %0, i64 0 %3 = shufflevector <4 x i32> %2, poison, <4 x i32> - // zeroinitializer %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3) - // => - // %2 = mul i32 %0, 4 + // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N) if (Value *Splat = getSplatValue(Arg)) { - // It is only a multiplication if we add the same element over and over. ElementCount ReducedVectorElementCount = cast(Arg->getType())->getElementCount(); if (ReducedVectorElementCount.isFixed()) { unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); - Type *SplatType = Splat->getType(); - return BinaryOperator::CreateMul( - Splat, ConstantInt::get(SplatType, VectorSize)); + return BinaryOperator::CreateMul(Splat, ConstantInt::get(Splat->getType(), VectorSize)); } } } From 2dd6052b11123563b62ac2730d75bd89f2b50034 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 21:11:35 +0200 Subject: [PATCH 16/18] Add a vscale test case --- .../Transforms/InstCombine/vector-reductions.ll | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll index 4355772fd23c1..f1e0dd9bd06d7 100644 --- a/llvm/test/Transforms/InstCombine/vector-reductions.ll +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -466,3 +466,16 @@ define i2 @constant_multiplied_7xi2(i2 %0) { %4 = tail call i2 @llvm.vector.reduce.add.v7i2(<7 x i2> %3) ret i2 %4 } + +define i32 @negative_scalable_vector(i32 %0) { +; CHECK-LABEL: @negative_scalable_vector( +; CHECK-NEXT: [[TMP2:%.*]] = insertelement poison, i32 [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[TMP2]], poison, zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP3]]) +; CHECK-NEXT: ret i32 [[TMP4]] +; + %2 = insertelement poison, i32 %0, i64 0 + %3 = shufflevector %2, poison, zeroinitializer + %4 = tail call i32 @llvm.vector.reduce.add.nxv4i32( %3) + ret i32 %4 +} From 91e53d477cf54ba7ee5e1bb3cd1fb127630abca8 Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 21:21:11 +0200 Subject: [PATCH 17/18] Formatting --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8b0385b402a7d..c7ac3c92548a7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3769,7 +3769,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { cast(Arg->getType())->getElementCount(); if (ReducedVectorElementCount.isFixed()) { unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); - return BinaryOperator::CreateMul(Splat, ConstantInt::get(Splat->getType(), VectorSize)); + return BinaryOperator::CreateMul( + Splat, ConstantInt::get(Splat->getType(), VectorSize)); } } } From 8c318480df603c5d1eeb108f06a1103b6b865c3e Mon Sep 17 00:00:00 2001 From: Gabor Spaits Date: Sun, 28 Sep 2025 21:34:06 +0200 Subject: [PATCH 18/18] Rename a variable --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index c7ac3c92548a7..6316ac1f8f524 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3765,10 +3765,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N) if (Value *Splat = getSplatValue(Arg)) { - ElementCount ReducedVectorElementCount = + ElementCount VecToReduceCount = cast(Arg->getType())->getElementCount(); - if (ReducedVectorElementCount.isFixed()) { - unsigned VectorSize = ReducedVectorElementCount.getFixedValue(); + if (VecToReduceCount.isFixed()) { + unsigned VectorSize = VecToReduceCount.getFixedValue(); return BinaryOperator::CreateMul( Splat, ConstantInt::get(Splat->getType(), VectorSize)); }