[SLP][REVEC] Honor slot type when computing NumberOfParts#193085
Merged
alexey-bataev merged 1 commit intomainfrom Apr 20, 2026
Merged
Conversation
Created using spr 1.3.7
Member
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Alexey Bataev (alexey-bataev) ChangesThe getNumberOfParts() helper split VecTy without considering that a Patch is 26.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/193085.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dd9cd8b6a7307..939303eedcce7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2021,13 +2021,15 @@ static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty,
/// registers, returns 1.
static unsigned
getNumberOfParts(const TargetTransformInfo &TTI, VectorType *VecTy,
+ Type *ScalarTy,
const unsigned Limit = std::numeric_limits<unsigned>::max()) {
unsigned NumParts = TTI.getNumberOfParts(VecTy);
if (NumParts == 0 || NumParts >= Limit)
return 1;
unsigned Sz = getNumElements(VecTy);
- if (NumParts >= Sz || Sz % NumParts != 0 ||
- !hasFullVectorsOrPowerOf2(TTI, VecTy->getElementType(), Sz / NumParts))
+ unsigned ScalarSz = getNumElements(ScalarTy);
+ if (NumParts >= Sz || Sz % NumParts != 0 || (Sz / NumParts) % ScalarSz != 0 ||
+ !hasFullVectorsOrPowerOf2(TTI, ScalarTy, Sz / NumParts))
return 1;
return NumParts;
}
@@ -3902,8 +3904,10 @@ class slpvectorizer::BoUpSLP {
SmallPtrSetImpl<Value *> &CheckedExtracts);
/// Estimates spill/reload cost from vector register pressure for \p E at the
- /// point of emitting its vector result type \p FinalVecTy.
- InstructionCost getVectorSpillReloadCost(const TreeEntry *E,
+ /// point of emitting its vector result type \p FinalVecTy. \p ScalarTy is the
+ /// scalar/slot type used to widen into \p VecTy/\p FinalVecTy and may itself
+ /// be a FixedVectorType in ReVec mode or an adjusted type due to MinBWs.
+ InstructionCost getVectorSpillReloadCost(const TreeEntry *E, Type *ScalarTy,
VectorType *VecTy,
VectorType *FinalVecTy,
TTI::TargetCostKind CostKind) const;
@@ -6594,7 +6598,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE,
if (!isValidElementType(ScalarTy))
return std::nullopt;
auto *VecTy = getWidenedType(ScalarTy, NumScalars);
- unsigned NumParts = ::getNumberOfParts(*TTI, VecTy, NumScalars);
+ unsigned NumParts = ::getNumberOfParts(*TTI, VecTy, ScalarTy, NumScalars);
SmallVector<int> ExtractMask;
SmallVector<int> Mask;
SmallVector<SmallVector<const TreeEntry *>> Entries;
@@ -8159,8 +8163,10 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom,
}
}
if (Sz == 2 && TE.getVectorFactor() == 4 &&
- ::getNumberOfParts(*TTI, getWidenedType(TE.Scalars.front()->getType(),
- 2 * TE.getVectorFactor())) == 1)
+ ::getNumberOfParts(*TTI,
+ getWidenedType(getValueType(TE.Scalars.front()),
+ 2 * TE.getVectorFactor()),
+ getValueType(TE.Scalars.front())) == 1)
return std::nullopt;
if (TE.ReuseShuffleIndices.size() % Sz != 0)
return std::nullopt;
@@ -14429,7 +14435,8 @@ void BoUpSLP::transformNodes() {
bool IsTwoRegisterSplat = true;
if (IsSplat && VF == 2) {
unsigned NumRegs2VF = ::getNumberOfParts(
- *TTI, getWidenedType(Slice.front()->getType(), 2 * VF));
+ *TTI, getWidenedType(getValueType(Slice.front()), 2 * VF),
+ getValueType(Slice.front()));
IsTwoRegisterSplat = NumRegs2VF == 2;
}
if (Slices.empty() || !IsSplat || !IsTwoRegisterSplat ||
@@ -15563,7 +15570,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
- unsigned NumParts = ::getNumberOfParts(TTI, MaskVecTy, Mask.size());
+ unsigned NumParts =
+ ::getNumberOfParts(TTI, MaskVecTy, ScalarTy, Mask.size());
unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
const auto *It = find_if(Mask, not_equal_to(PoisonMaskElem));
unsigned Part = std::distance(Mask.begin(), It) / SliceSize;
@@ -15577,7 +15585,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
- unsigned NumParts = ::getNumberOfParts(TTI, MaskVecTy, Mask.size());
+ unsigned NumParts =
+ ::getNumberOfParts(TTI, MaskVecTy, ScalarTy, Mask.size());
unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
const auto *It = find_if(Mask, not_equal_to(PoisonMaskElem));
unsigned Part = std::distance(Mask.begin(), It) / SliceSize;
@@ -15892,8 +15901,8 @@ unsigned BoUpSLP::getScaleToLoopIterations(const TreeEntry &TE, Value *Scalar,
}
InstructionCost
-BoUpSLP::getVectorSpillReloadCost(const TreeEntry *E, VectorType *VecTy,
- VectorType *FinalVecTy,
+BoUpSLP::getVectorSpillReloadCost(const TreeEntry *E, Type *ScalarTy,
+ VectorType *VecTy, VectorType *FinalVecTy,
TTI::TargetCostKind CostKind) const {
InstructionCost SpillsReloads = 0;
@@ -15919,7 +15928,8 @@ BoUpSLP::getVectorSpillReloadCost(const TreeEntry *E, VectorType *VecTy,
PressureByClass[RegClass] += Parts;
};
- auto GetEntryVecTy = [&](const TreeEntry *TE) -> VectorType * {
+ auto GetEntryVecTy =
+ [&](const TreeEntry *TE) -> std::pair<Type *, VectorType *> {
Type *ScalarTy = getValueType(TE->Scalars.front());
auto BWIt = MinBWs.find(TE);
if (BWIt != MinBWs.end()) {
@@ -15928,7 +15938,8 @@ BoUpSLP::getVectorSpillReloadCost(const TreeEntry *E, VectorType *VecTy,
if (VTy)
ScalarTy = getWidenedType(ScalarTy, VTy->getNumElements());
}
- return getWidenedType(ScalarTy, TE->getVectorFactor());
+ return std::make_pair(ScalarTy,
+ getWidenedType(ScalarTy, TE->getVectorFactor()));
};
if (E->State == TreeEntry::SplitVectorize) {
@@ -15937,8 +15948,8 @@ BoUpSLP::getVectorSpillReloadCost(const TreeEntry *E, VectorType *VecTy,
if (!CountedOpEntries.insert(OpTE).second)
continue;
- auto *OpVecTy = GetEntryVecTy(OpTE);
- const unsigned Parts = ::getNumberOfParts(*TTI, OpVecTy);
+ auto [ScalarTy, OpVecTy] = GetEntryVecTy(OpTE);
+ const unsigned Parts = ::getNumberOfParts(*TTI, OpVecTy, ScalarTy);
if (Parts == 0)
continue;
const unsigned RC =
@@ -15951,8 +15962,8 @@ BoUpSLP::getVectorSpillReloadCost(const TreeEntry *E, VectorType *VecTy,
SmallDenseMap<unsigned, unsigned> MaxOpPressureByClass;
for (unsigned Idx : seq<unsigned>(E->getNumOperands())) {
const TreeEntry *OpTE = getOperandEntry(E, Idx);
- auto *OpVecTy = GetEntryVecTy(OpTE);
- const unsigned Parts = ::getNumberOfParts(*TTI, OpVecTy);
+ auto [ScalarTy, OpVecTy] = GetEntryVecTy(OpTE);
+ const unsigned Parts = ::getNumberOfParts(*TTI, OpVecTy, ScalarTy);
if (Parts == 0)
continue;
const unsigned RC =
@@ -15978,7 +15989,7 @@ BoUpSLP::getVectorSpillReloadCost(const TreeEntry *E, VectorType *VecTy,
if (!CountedOpEntries.insert(OpTE).second)
continue;
auto *OpVecTy = getWidenedType(Op->getType(), Ops.size());
- const unsigned Parts = ::getNumberOfParts(*TTI, OpVecTy);
+ const unsigned Parts = ::getNumberOfParts(*TTI, OpVecTy, Op->getType());
if (Parts == 0)
continue;
const unsigned RC =
@@ -15988,13 +15999,14 @@ BoUpSLP::getVectorSpillReloadCost(const TreeEntry *E, VectorType *VecTy,
}
if (E->getOpcode() != Instruction::Load) {
- const unsigned ResParts = ::getNumberOfParts(*TTI, VecTy);
+ const unsigned ResParts = ::getNumberOfParts(*TTI, VecTy, ScalarTy);
if (ResParts != 0) {
const unsigned RC = TTI->getRegisterClassForType(/*Vector=*/true, VecTy);
AddPartsToClass(RC, ResParts);
}
if (VecTy != FinalVecTy) {
- const unsigned FinalResParts = ::getNumberOfParts(*TTI, FinalVecTy);
+ const unsigned FinalResParts =
+ ::getNumberOfParts(*TTI, FinalVecTy, ScalarTy);
if (FinalResParts != 0) {
const unsigned RC =
TTI->getRegisterClassForType(/*Vector=*/true, FinalVecTy);
@@ -16052,7 +16064,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
const InstructionCost SpillsReloads =
- getVectorSpillReloadCost(E, VecTy, FinalVecTy, CostKind);
+ getVectorSpillReloadCost(E, ScalarTy, VecTy, FinalVecTy, CostKind);
if (E->isGather() || TransformedToGatherNodes.contains(E)) {
if (allConstant(VL))
return 0;
@@ -16342,7 +16354,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
unsigned const NumElts = SrcVecTy->getNumElements();
unsigned const NumScalars = VL.size();
- unsigned NumOfParts = ::getNumberOfParts(*TTI, SrcVecTy);
+ unsigned NumOfParts =
+ ::getNumberOfParts(*TTI, SrcVecTy, VL0->getOperand(1)->getType());
SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
unsigned OffsetBeg = *getElementIndex(VL.front());
@@ -21133,7 +21146,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
SmallVector<SmallVector<const TreeEntry *>> Entries;
Type *OrigScalarTy = GatheredScalars.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size());
- unsigned NumParts = ::getNumberOfParts(*TTI, VecTy, GatheredScalars.size());
+ unsigned NumParts =
+ ::getNumberOfParts(*TTI, VecTy, ScalarTy, GatheredScalars.size());
if (!all_of(GatheredScalars, IsaPred<UndefValue>)) {
// Check for gathered extracts.
bool Resized = false;
@@ -21166,8 +21180,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
Resized = true;
GatheredScalars.append(VF - GatheredScalars.size(),
PoisonValue::get(OrigScalarTy));
- NumParts =
- ::getNumberOfParts(*TTI, getWidenedType(OrigScalarTy, VF), VF);
+ NumParts = ::getNumberOfParts(
+ *TTI, getWidenedType(OrigScalarTy, VF), OrigScalarTy, VF);
}
}
}
@@ -21395,9 +21409,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
}
}
if (!GatherShuffles.empty()) {
- unsigned SliceSize =
- getPartNumElems(E->Scalars.size(),
- ::getNumberOfParts(*TTI, VecTy, E->Scalars.size()));
+ unsigned SliceSize = getPartNumElems(
+ E->Scalars.size(),
+ ::getNumberOfParts(*TTI, VecTy, ScalarTy, E->Scalars.size()));
SmallVector<int> VecMask(Mask.size(), PoisonMaskElem);
for (const auto [I, TEs] : enumerate(Entries)) {
if (TEs.empty()) {
@@ -23631,10 +23645,13 @@ void BoUpSLP::optimizeGatherSequence() {
// Check if the last undefs actually change the final number of used vector
// registers.
return SM1.size() - LastUndefsCnt > 1 &&
- ::getNumberOfParts(*TTI, SI1->getType()) ==
+ ::getNumberOfParts(*TTI, SI1->getType(),
+ SI1->getType()->getElementType()) ==
::getNumberOfParts(
- *TTI, getWidenedType(SI1->getType()->getElementType(),
- SM1.size() - LastUndefsCnt));
+ *TTI,
+ getWidenedType(SI1->getType()->getElementType(),
+ SM1.size() - LastUndefsCnt),
+ SI1->getType()->getElementType());
};
// Perform O(N^2) search over the gather/shuffle sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
@@ -24948,12 +24965,14 @@ bool BoUpSLP::collectValuesToDemote(
const unsigned VF = E.Scalars.size();
Type *OrigScalarTy = E.Scalars.front()->getType();
if (UniqueBases.size() <= 2 ||
- ::getNumberOfParts(*TTI, getWidenedType(OrigScalarTy, VF)) >=
+ ::getNumberOfParts(*TTI, getWidenedType(OrigScalarTy, VF),
+ OrigScalarTy) >=
::getNumberOfParts(
*TTI,
getWidenedType(
IntegerType::get(OrigScalarTy->getContext(), BitWidth),
- VF))) {
+ VF),
+ IntegerType::get(OrigScalarTy->getContext(), BitWidth))) {
ToDemote.push_back(E.Idx);
return true;
}
@@ -25395,7 +25414,6 @@ void BoUpSLP::computeMinimumValueSizes() {
unsigned VF = E.getVectorFactor();
Type *ScalarTy = E.Scalars.front()->getType();
- unsigned ScalarTyNumElements = getNumElements(ScalarTy);
auto *TreeRootIT = dyn_cast<IntegerType>(ScalarTy->getScalarType());
if (!TreeRootIT)
return 0u;
@@ -25404,8 +25422,8 @@ void BoUpSLP::computeMinimumValueSizes() {
[&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
return 0u;
- unsigned NumParts = ::getNumberOfParts(
- *TTI, getWidenedType(TreeRootIT, VF * ScalarTyNumElements));
+ unsigned NumParts =
+ ::getNumberOfParts(*TTI, getWidenedType(ScalarTy, VF), ScalarTy);
// The maximum bit width required to represent all the values that can be
// demoted without loss of precision. It would be safe to truncate the roots
@@ -25479,9 +25497,11 @@ void BoUpSLP::computeMinimumValueSizes() {
if (NumParts > 1 &&
NumParts ==
::getNumberOfParts(
- *TTI, getWidenedType(IntegerType::get(F->getContext(),
- bit_ceil(MaxBitWidth)),
- VF)))
+ *TTI,
+ getWidenedType(
+ IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)),
+ VF),
+ IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth))))
return 0u;
unsigned Opcode = E.getOpcode();
@@ -27818,14 +27838,14 @@ class HorizontalReduction {
ReduxWidth =
getFloorFullVectorNumberOfElements(TTI, ScalarTy, ReduxWidth);
VectorType *Tp = getWidenedType(ScalarTy, ReduxWidth);
- NumParts = ::getNumberOfParts(TTI, Tp);
+ NumParts = ::getNumberOfParts(TTI, Tp, ScalarTy);
NumRegs =
TTI.getNumberOfRegisters(TTI.getRegisterClassForType(true, Tp));
while (NumParts > NumRegs) {
assert(ReduxWidth > 0 && "ReduxWidth is unexpectedly 0.");
ReduxWidth = bit_floor(ReduxWidth - 1);
VectorType *Tp = getWidenedType(ScalarTy, ReduxWidth);
- NumParts = ::getNumberOfParts(TTI, Tp);
+ NumParts = ::getNumberOfParts(TTI, Tp, ScalarTy);
NumRegs =
TTI.getNumberOfRegisters(TTI.getRegisterClassForType(true, Tp));
}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/revec-reductions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/revec-reductions.ll
index 92701dadbfc1b..4f0e38ebf34bd 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/revec-reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/revec-reductions.ll
@@ -4,55 +4,25 @@
define <16 x i64> @test() {
; CHECK-LABEL: define <16 x i64> @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = call <64 x i64> @llvm.smin.v64i64(<64 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, <64 x i64> zeroinitializer)
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 0, i32 16, i32 32, i32 48>
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 1, i32 17, i32 33, i32 49>
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP4]])
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i64> [[TMP3]], i64 [[TMP5]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 2, i32 18, i32 34, i32 50>
-; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP7]])
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i64> [[TMP6]], i64 [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 3, i32 19, i32 35, i32 51>
-; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP10]])
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i64> [[TMP9]], i64 [[TMP11]], i64 3
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 4, i32 20, i32 36, i32 52>
-; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP13]])
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i64> [[TMP12]], i64 [[TMP14]], i64 4
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 5, i32 21, i32 37, i32 53>
-; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP16]])
-; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i64> [[TMP15]], i64 [[TMP17]], i64 5
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 6, i32 22, i32 38, i32 54>
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP19]])
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i64> [[TMP18]], i64 [[TMP20]], i64 6
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 7, i32 23, i32 39, i32 55>
-; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP22]])
-; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x i64> [[TMP21]], i64 [[TMP23]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 8, i32 24, i32 40, i32 56>
-; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP25]])
-; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i64> [[TMP24]], i64 [[TMP26]], i64 8
-; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 9, i32 25, i32 41, i32 57>
-; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP28]])
-; CHECK-NEXT: [[TMP30:%.*]] = insertelement <16 x i64> [[TMP27]], i64 [[TMP29]], i64 9
-; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 10, i32 26, i32 42, i32 58>
-; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP31]])
-; CHECK-NEXT: [[TMP33:%.*]] = insertelement <16 x i64> [[TMP30]], i64 [[TMP32]], i64 10
-; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 11, i32 27, i32 43, i32 59>
-; CHECK-NEXT: [[TMP35:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP34]])
-; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i64> [[TMP33]], i64 [[TMP35]], i64 11
-; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 12, i32 28, i32 44, i32 60>
-; CHECK-NEXT: [[TMP38:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP37]])
-; CHECK-NEXT: [[TMP39:%.*]] = insertelement <16 x i64> [[TMP36]], i64 [[TMP38]], i64 12
-; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <64 x i64> [[TMP0]], <64 x i64> poison, <4 x i32> <i32 13, i32 29, i32 45, i32 61>
-; CHECK-NEXT: [[TMP41:%.*]] = call i...
[truncated]
|
llvm-sync bot
pushed a commit
to arm/arm-toolchain
that referenced
this pull request
Apr 20, 2026
The getNumberOfParts() helper split VecTy without considering that a REVEC slot is a FixedVectorType, so NumParts could fall on a non-slot boundary. Add an explicit ScalarTy argument, require (Sz / NumParts) to be a multiple of getNumElements(ScalarTy), and use ScalarTy for the hasFullVectorsOrPowerOf2 check. For non-REVEC callers ScalarSz == 1 and behavior is unchanged. Fixes #192963. Reviewers: Pull Request: llvm/llvm-project#193085
cpullvm-upstream-sync bot
pushed a commit
to navaneethshan/cpullvm-toolchain-1
that referenced
this pull request
Apr 20, 2026
The getNumberOfParts() helper split VecTy without considering that a REVEC slot is a FixedVectorType, so NumParts could fall on a non-slot boundary. Add an explicit ScalarTy argument, require (Sz / NumParts) to be a multiple of getNumElements(ScalarTy), and use ScalarTy for the hasFullVectorsOrPowerOf2 check. For non-REVEC callers ScalarSz == 1 and behavior is unchanged. Fixes #192963. Reviewers: Pull Request: llvm/llvm-project#193085
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
The getNumberOfParts() helper split VecTy without considering that a
REVEC slot is a FixedVectorType, so NumParts could fall on a non-slot
boundary.
Add an explicit ScalarTy argument, require (Sz / NumParts) to be a
multiple of getNumElements(ScalarTy), and use ScalarTy for the
hasFullVectorsOrPowerOf2 check. For non-REVEC callers ScalarSz == 1 and
behavior is unchanged.
Fixes #192963.