-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Support minbitwidth analisys for buildvector nodes. #88504
[SLP]Support minbitwidth analisys for buildvector nodes. #88504
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-systemz Author: Alexey Bataev (alexey-bataev) ChangesMetric: size..text Program size..text Less is better. Replaces buildvector <p x in> + trunc <p x in> to <p x im> sequences to Patch is 41.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88504.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index df891371fdf758..9d25a8ef3ec09e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2465,12 +2465,12 @@ class BoUpSLP {
/// which exploits values reused across lanes, and arranges the inserts
/// for ease of later optimization.
template <typename BVTy, typename ResTy, typename... Args>
- ResTy processBuildVector(const TreeEntry *E, Args &...Params);
+ ResTy processBuildVector(const TreeEntry *E, Type *ScalarTy, Args &...Params);
/// Create a new vector from a list of scalar values. Produces a sequence
/// which exploits values reused across lanes, and arranges the inserts
/// for ease of later optimization.
- Value *createBuildVector(const TreeEntry *E);
+ Value *createBuildVector(const TreeEntry *E, Type *ScalarTy);
/// Returns the instruction in the bundle, which can be used as a base point
/// for scheduling. Usually it is the last instruction in the bundle, except
@@ -2534,7 +2534,8 @@ class BoUpSLP {
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
/// \param ForPoisonSrc true if initial vector is poison, false otherwise.
- InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc) const;
+ InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
+ Type *ScalarTy) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
@@ -2542,7 +2543,7 @@ class BoUpSLP {
/// \returns a vector from a collection of scalars in \p VL. if \p Root is not
/// specified, the starting vector value is poison.
- Value *gather(ArrayRef<Value *> VL, Value *Root);
+ Value *gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy);
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
@@ -7760,6 +7761,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
bool IsFinalized = false;
SmallVector<int> CommonMask;
SmallVector<PointerUnion<Value *, const TreeEntry *>, 2> InVectors;
+ Type *ScalarTy = nullptr;
const TargetTransformInfo &TTI;
InstructionCost Cost = 0;
SmallDenseSet<Value *> VectorizedVals;
@@ -7789,13 +7791,13 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
if ((!Root && allConstant(VL)) || all_of(VL, IsaPred<UndefValue>))
return TTI::TCC_Free;
- auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
InstructionCost GatherCost = 0;
SmallVector<Value *> Gathers(VL.begin(), VL.end());
// Improve gather cost for gather of loads, if we can group some of the
// loads into vector loads.
InstructionsState S = getSameOpcode(VL, *R.TLI);
- const unsigned Sz = R.DL->getTypeSizeInBits(VL.front()->getType());
+ const unsigned Sz = R.DL->getTypeSizeInBits(ScalarTy);
unsigned MinVF = R.getMinVF(2 * Sz);
if (VL.size() > 2 &&
((S.getOpcode() == Instruction::Load && !S.isAltShuffle()) ||
@@ -7809,7 +7811,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}))) &&
!all_of(Gathers, [&](Value *V) { return R.getTreeEntry(V); }) &&
!isSplat(Gathers)) {
- InstructionCost BaseCost = R.getGatherCost(Gathers, !Root);
+ InstructionCost BaseCost = R.getGatherCost(Gathers, !Root, ScalarTy);
SetVector<Value *> VectorizedLoads;
SmallVector<std::pair<unsigned, LoadsState>> VectorizedStarts;
SmallVector<unsigned> ScatterVectorized;
@@ -7936,7 +7938,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
VecTy, Mask, CostKind);
}
} else {
- GatherCost += R.getGatherCost(PointerOps, /*ForPoisonSrc=*/true);
+ GatherCost += R.getGatherCost(PointerOps, /*ForPoisonSrc=*/true,
+ PointerOps.front()->getType());
}
}
if (NeedInsertSubvectorAnalysis) {
@@ -7970,18 +7973,19 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
transform(VL, ShuffleMask.begin(), [](Value *V) {
return isa<PoisonValue>(V) ? PoisonMaskElem : 0;
});
- InstructionCost InsertCost = TTI.getVectorInstrCost(
- Instruction::InsertElement, VecTy, CostKind, 0,
- PoisonValue::get(VecTy), *It);
- return InsertCost +
- TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
- ShuffleMask, CostKind, /*Index=*/0,
- /*SubTp=*/nullptr, /*Args=*/*It);
+ InstructionCost InsertCost =
+ TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, 0,
+ PoisonValue::get(VecTy), *It);
+ return InsertCost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast,
+ VecTy, ShuffleMask, CostKind,
+ /*Index=*/0, /*SubTp=*/nullptr,
+ /*Args=*/*It);
}
return GatherCost +
(all_of(Gathers, IsaPred<UndefValue>)
? TTI::TCC_Free
- : R.getGatherCost(Gathers, !Root && VL.equals(Gathers)));
+ : R.getGatherCost(Gathers, !Root && VL.equals(Gathers),
+ ScalarTy));
};
/// Compute the cost of creating a vector containing the extracted values from
@@ -8001,8 +8005,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return Sz;
return std::max(Sz, VecTy->getNumElements());
});
- unsigned NumSrcRegs = TTI.getNumberOfParts(
- FixedVectorType::get(VL.front()->getType(), NumElts));
+ unsigned NumSrcRegs =
+ TTI.getNumberOfParts(FixedVectorType::get(ScalarTy, NumElts));
if (NumSrcRegs == 0)
NumSrcRegs = 1;
// FIXME: this must be moved to TTI for better estimation.
@@ -8048,17 +8052,16 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
std::optional<TTI::ShuffleKind> RegShuffleKind =
CheckPerRegistersShuffle(SubMask);
if (!RegShuffleKind) {
- Cost += ::getShuffleCost(
- TTI, *ShuffleKinds[Part],
- FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice);
+ Cost += ::getShuffleCost(TTI, *ShuffleKinds[Part],
+ FixedVectorType::get(ScalarTy, NumElts),
+ MaskSlice);
continue;
}
if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
!ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
- Cost += ::getShuffleCost(
- TTI, *RegShuffleKind,
- FixedVectorType::get(VL.front()->getType(), EltsPerVector),
- SubMask);
+ Cost += ::getShuffleCost(TTI, *RegShuffleKind,
+ FixedVectorType::get(ScalarTy, EltsPerVector),
+ SubMask);
}
}
return Cost;
@@ -8175,6 +8178,48 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
SmallVector<int> CommonMask(Mask.begin(), Mask.end());
Value *V1 = P1.dyn_cast<Value *>(), *V2 = P2.dyn_cast<Value *>();
unsigned CommonVF = Mask.size();
+ InstructionCost ExtraCost = 0;
+ auto GetNodeMinBWAffectedCost = [&](const TreeEntry &E,
+ unsigned VF) -> InstructionCost {
+ if (E.State == TreeEntry::NeedToGather && allConstant(E.Scalars))
+ return TTI::TCC_Free;
+ Type *EScalarTy = E.Scalars.front()->getType();
+ bool IsSigned = true;
+ if (auto It = R.MinBWs.find(&E); It != R.MinBWs.end()) {
+ EScalarTy = IntegerType::get(EScalarTy->getContext(), It->second.first);
+ IsSigned = It->second.second;
+ }
+ if (EScalarTy != ScalarTy) {
+ unsigned CastOpcode = Instruction::Trunc;
+ unsigned DstSz = R.DL->getTypeSizeInBits(ScalarTy);
+ unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
+ if (DstSz > SrcSz)
+ CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
+ return TTI.getCastInstrCost(
+ CastOpcode, FixedVectorType::get(ScalarTy, VF),
+ FixedVectorType::get(EScalarTy, VF), TTI::CastContextHint::None,
+ TTI::TCK_RecipThroughput);
+ }
+ return TTI::TCC_Free;
+ };
+ auto GetValueMinBWAffectedCost = [&](const Value *V) -> InstructionCost {
+ if (isa<Constant>(V))
+ return TTI::TCC_Free;
+ auto *VecTy = cast<VectorType>(V->getType());
+ Type *EScalarTy = VecTy->getElementType();
+ if (EScalarTy != ScalarTy) {
+ bool IsSigned = !isKnownNonNegative(V, SimplifyQuery(*R.DL));
+ unsigned CastOpcode = Instruction::Trunc;
+ unsigned DstSz = R.DL->getTypeSizeInBits(ScalarTy);
+ unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
+ if (DstSz > SrcSz)
+ CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
+ return TTI.getCastInstrCost(
+ CastOpcode, VectorType::get(ScalarTy, VecTy->getElementCount()),
+ VecTy, TTI::CastContextHint::None, TTI::TCK_RecipThroughput);
+ }
+ return TTI::TCC_Free;
+ };
if (!V1 && !V2 && !P2.isNull()) {
// Shuffle 2 entry nodes.
const TreeEntry *E = P1.get<const TreeEntry *>();
@@ -8201,11 +8246,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
}
CommonVF = E->Scalars.size();
+ ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF) +
+ GetNodeMinBWAffectedCost(*E2, CommonVF);
+ } else {
+ ExtraCost += GetNodeMinBWAffectedCost(*E, E->getVectorFactor()) +
+ GetNodeMinBWAffectedCost(*E2, E2->getVectorFactor());
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL, FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else if (!V1 && P2.isNull()) {
// Shuffle single entry node.
const TreeEntry *E = P1.get<const TreeEntry *>();
@@ -8224,8 +8272,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = E->Scalars.size();
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF);
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
// Not identity/broadcast? Try to see if the original vector is better.
if (!E->ReorderIndices.empty() && CommonVF == E->ReorderIndices.size() &&
CommonVF == CommonMask.size() &&
@@ -8242,6 +8290,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
} else if (V1 && P2.isNull()) {
// Shuffle single vector.
+ ExtraCost += GetValueMinBWAffectedCost(V1);
CommonVF = cast<FixedVectorType>(V1->getType())->getNumElements();
assert(
all_of(Mask,
@@ -8268,11 +8317,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = VF;
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL,
- FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetValueMinBWAffectedCost(V1);
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(
+ *E2, std::min(CommonVF, E2->getVectorFactor()));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else if (!V1 && V2) {
// Shuffle vector and tree node.
unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
@@ -8296,11 +8345,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = VF;
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL,
- FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(
+ *E1, std::min(CommonVF, E1->getVectorFactor()));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ ExtraCost += GetValueMinBWAffectedCost(V2);
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else {
assert(V1 && V2 && "Expected both vectors.");
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
@@ -8311,30 +8360,33 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return Idx < 2 * static_cast<int>(CommonVF);
}) &&
"All elements in mask must be less than 2 * CommonVF.");
+ ExtraCost +=
+ GetValueMinBWAffectedCost(V1) + GetValueMinBWAffectedCost(V2);
if (V1->getType() != V2->getType()) {
- V1 = Constant::getNullValue(FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL, FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(),
- CommonVF));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+ } else {
+ if (cast<VectorType>(V1->getType())->getElementType() != ScalarTy)
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ if (cast<VectorType>(V2->getType())->getElementType() != ScalarTy)
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
}
}
- InVectors.front() = Constant::getNullValue(FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(),
- CommonMask.size()));
+ InVectors.front() = Constant::getNullValue(
+ FixedVectorType::get(ScalarTy, CommonMask.size()));
if (InVectors.size() == 2)
InVectors.pop_back();
- return BaseShuffleAnalysis::createShuffle<InstructionCost>(
- V1, V2, CommonMask, Builder);
+ return ExtraCost + BaseShuffleAnalysis::createShuffle<InstructionCost>(
+ V1, V2, CommonMask, Builder);
}
public:
- ShuffleCostEstimator(TargetTransformInfo &TTI,
+ ShuffleCostEstimator(Type *ScalarTy, TargetTransformInfo &TTI,
ArrayRef<Value *> VectorizedVals, BoUpSLP &R,
SmallPtrSetImpl<Value *> &CheckedExtracts)
- : TTI(TTI), VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()),
- R(R), CheckedExtracts(CheckedExtracts) {}
+ : ScalarTy(ScalarTy), TTI(TTI),
+ VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()), R(R),
+ CheckedExtracts(CheckedExtracts) {}
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
unsigned NumParts, bool &UseVecBaseAsInput) {
@@ -8424,7 +8476,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
if (NumParts != 1 && UniqueBases.size() != 1) {
UseVecBaseAsInput = true;
VecBase = Constant::getNullValue(
- FixedVectorType::get(VL.front()->getType(), CommonMask.size()));
+ FixedVectorType::get(ScalarTy, CommonMask.size()));
}
return VecBase;
}
@@ -8452,8 +8504,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return;
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
- auto *MaskVecTy =
- FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
+ auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
if (NumParts == 0 || NumParts >= Mask.size())
NumParts = 1;
@@ -8470,8 +8521,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return;
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
- auto *MaskVecTy =
- FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
+ auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
if (NumParts == 0 || NumParts >= Mask.size())
NumParts = 1;
@@ -8571,7 +8621,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return ConstantVector::getSplat(
ElementCount::getFixed(
cast<FixedVectorType>(Root->getType())->getNumElements()),
- getAllOnesValue(*R.DL, VL.front()->getType()));
+ getAllOnesValue(*R.DL, ScalarTy));
}
InstructionCost createFreeze(InstructionCost Cost) { return Cost; }
/// Finalize emission of the shuffles.
@@ -8717,7 +8767,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
return processBuildVector<ShuffleCostEstimator, InstructionCost>(
- E, *TTI, VectorizedVals, *this, CheckedExtracts);
+ E, ScalarTy, *TTI, VectorizedVals, *this, CheckedExtracts);
}
InstructionCost CommonCost = 0;
SmallVector<int> Mask;
@@ -10726,12 +10776,8 @@ BoUpSLP::isGatherShuffledEntry(
return Res;
}
-InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
- bool ForPoisonSrc) const {
- // Find the type of the operands in VL.
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
+InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
+ Type *ScalarTy) const {
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
bool DuplicateNonConst = false;
// Find the cost of inserting/extracting values from the vector.
@@ -10742,6 +10788,11 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Cost;
auto EstimateInsertCost = [&](unsigned I, Value *V) {
+ if (V->getType() != ScalarTy) {
+ Cost += TTI->getCastInstrCost(Instruction::Trunc, ScalarTy, V->getType(),
+ TTI::CastContextHint::None, CostKind);
+ V = nullptr;
+ }
if (!ForPoisonSrc)
Cost +=
TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
@@ -10966,7 +11017,7 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
-Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
+Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
// List of instructions/lanes from current block and/or the blocks which are
// part of the current loop. These instructions will be inserted at the end to
// make it possible to optimize loops and hoist invariant instructions out of
@@ -10992,14 +11043,11 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos,
Type *Ty) {
Value *Scalar = V;
- if (cast<VectorType>(Vec->getType())->getElementType() != Ty) {
- assert(V->getType()->isIntegerTy() && Ty->isIntegerTy() &&
+ if (Scalar->getType() !=...
[truncated]
|
Ping! |
1 similar comment
Ping! |
CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt; | ||
return TTI.getCastInstrCost( | ||
CastOpcode, VectorType::get(ScalarTy, VecTy->getElementCount()), | ||
VecTy, TTI::CastContextHint::None, TTI::TCK_RecipThroughput); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should CostKind be hard coded like this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will use CostKind here instead
Created using spr 1.3.5
Ping! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Metric: size..text
Program size..text
exp ref diff
test-suite :: MultiSource/Benchmarks/mediabench/gsm/toast/toast.test 42906.00 42986.00 0.2%
test-suite :: MultiSource/Benchmarks/MiBench/telecomm-gsm/telecomm-gsm.test 42909.00 42989.00 0.2%
test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664581.00 664661.00 0.0%
test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664581.00 664661.00 0.0%
Less is better.
Replaces
buildvector <p x in> + trunc <p x in> to <p x im>
sequences tobuildvector <p x im> of { trunc in to im }
scalars, which is free inmost cases, results in better code.