-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Emit actual bitwidth for analyzed MinBitwidth nodes, NFCI. #71536
[SLP]Emit actual bitwidth for analyzed MinBitwidth nodes, NFCI. #71536
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesSLP includes analysis for the minimum bitwidth, the actual integer Patch is 33.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71536.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bb233ed7d6c77ce..8fb679c6e2609fc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7892,6 +7892,30 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
continue;
UsedScalars.set(I);
}
+ auto GetCastGontextHint = [&](Value *V) {
+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
+ if (const TreeEntry *OpTE = getTreeEntry(V)) {
+ if (OpTE->State == TreeEntry::ScatterVectorize) {
+ CCH = TTI::CastContextHint::GatherScatter;
+ } else if (OpTE->State == TreeEntry::Vectorize &&
+ OpTE->getOpcode() == Instruction::Load &&
+ !OpTE->isAltShuffle()) {
+ if (OpTE->ReorderIndices.empty()) {
+ CCH = TTI::CastContextHint::Normal;
+ } else {
+ SmallVector<int> Mask;
+ inversePermutation(OpTE->ReorderIndices, Mask);
+ if (ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
+ CCH = TTI::CastContextHint::Reversed;
+ }
+ }
+ } else {
+ InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI);
+ if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle())
+ CCH = TTI::CastContextHint::GatherScatter;
+ }
+ return CCH;
+ };
auto GetCostDiff =
[=](function_ref<InstructionCost(unsigned)> ScalarEltCost,
function_ref<InstructionCost(InstructionCost)> VectorCost) {
@@ -7911,6 +7935,42 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
}
InstructionCost VecCost = VectorCost(CommonCost);
+ // Check if the current node must be resized, if the parent node is not
+ // resized.
+ if (!UnaryInstruction::isCast(E->getOpcode()) && E->Idx != 0 &&
+ (E->UserTreeIndices.front().UserTE->getOpcode() !=
+ Instruction::Select ||
+ E->UserTreeIndices.front().EdgeIdx != 0) &&
+ It != MinBWs.end()) {
+ auto UserBWIt =
+ MinBWs.find(E->UserTreeIndices.front().UserTE->Scalars.front());
+ Type *UserScalarTy =
+ E->UserTreeIndices.front()
+ .UserTE->getOperand(E->UserTreeIndices.front().EdgeIdx)
+ .front()
+ ->getType();
+ if (UserBWIt != MinBWs.end())
+ UserScalarTy = IntegerType::get(ScalarTy->getContext(),
+ UserBWIt->second.first);
+ if (ScalarTy != UserScalarTy) {
+ unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
+ unsigned SrcBWSz = DL->getTypeSizeInBits(UserScalarTy);
+ unsigned VecOpcode;
+ auto *SrcVecTy =
+ FixedVectorType::get(UserScalarTy, E->getVectorFactor());
+ if (BWSz > SrcBWSz)
+ VecOpcode = Instruction::Trunc;
+ else
+ VecOpcode =
+ It->second.second ? Instruction::SExt : Instruction::ZExt;
+ TTI::CastContextHint CCH = GetCastGontextHint(VL0);
+ VecCost += TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH,
+ CostKind);
+ ScalarCost +=
+ Sz * TTI->getCastInstrCost(VecOpcode, ScalarTy, UserScalarTy,
+ CCH, CostKind);
+ }
+ }
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost - CommonCost,
ScalarCost, "Calculated costs for Tree"));
return VecCost - ScalarCost;
@@ -8182,6 +8242,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Type *SrcScalarTy = VL0->getOperand(0)->getType();
auto *SrcVecTy = FixedVectorType::get(SrcScalarTy, VL.size());
unsigned Opcode = ShuffleOrOp;
+ unsigned VecOpcode = Opcode;
if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() &&
(SrcIt != MinBWs.end() || It != MinBWs.end())) {
// Check if the values are candidates to demote.
@@ -8193,46 +8254,36 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
}
unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
if (BWSz == SrcBWSz) {
- Opcode = Instruction::BitCast;
+ VecOpcode = Instruction::BitCast;
} else if (BWSz < SrcBWSz) {
- Opcode = Instruction::Trunc;
+ VecOpcode = Instruction::Trunc;
} else if (It != MinBWs.end()) {
assert(BWSz > SrcBWSz && "Invalid cast!");
- Opcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
+ VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
}
}
- auto GetScalarCost = [&](unsigned Idx) {
+ auto GetScalarCost = [&](unsigned Idx) -> InstructionCost {
+ // Do not count cost here if minimum bitwidth is in effect and it is just
+ // a bitcast (here it is just a noop).
+ if (VecOpcode != Opcode && VecOpcode == Instruction::BitCast)
+ return TTI::TCC_Free;
auto *VI = VL0->getOpcode() == Opcode
? cast<Instruction>(UniqueValues[Idx])
: nullptr;
- return TTI->getCastInstrCost(Opcode, ScalarTy, SrcScalarTy,
+ return TTI->getCastInstrCost(Opcode, VL0->getType(),
+ VL0->getOperand(0)->getType(),
TTI::getCastContextHint(VI), CostKind, VI);
};
- TTI::CastContextHint CCH = TTI::CastContextHint::None;
- if (const TreeEntry *OpTE = getTreeEntry(VL0->getOperand(0))) {
- if (OpTE->State == TreeEntry::ScatterVectorize) {
- CCH = TTI::CastContextHint::GatherScatter;
- } else if (OpTE->State == TreeEntry::Vectorize &&
- OpTE->getOpcode() == Instruction::Load &&
- !OpTE->isAltShuffle()) {
- if (OpTE->ReorderIndices.empty()) {
- CCH = TTI::CastContextHint::Normal;
- } else {
- SmallVector<int> Mask;
- inversePermutation(OpTE->ReorderIndices, Mask);
- if (ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
- CCH = TTI::CastContextHint::Reversed;
- }
- }
- } else {
- InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI);
- if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle())
- CCH = TTI::CastContextHint::GatherScatter;
- }
auto GetVectorCost = [=](InstructionCost CommonCost) {
+ // Do not count cost here if minimum bitwidth is in effect and it is just
+ // a bitcast (here it is just a noop).
+ if (VecOpcode != Opcode && VecOpcode == Instruction::BitCast)
+ return CommonCost;
auto *VI = VL0->getOpcode() == Opcode ? VL0 : nullptr;
+ TTI::CastContextHint CCH = GetCastGontextHint(VL0->getOperand(0));
return CommonCost +
- TTI->getCastInstrCost(Opcode, VecTy, SrcVecTy, CCH, CostKind, VI);
+ TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH, CostKind,
+ VecOpcode == Opcode ? VI : nullptr);
};
return GetCostDiff(GetScalarCost, GetVectorCost);
}
@@ -8966,6 +9017,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
SmallVector<std::pair<Value *, const TreeEntry *>> FirstUsers;
SmallVector<APInt> DemandedElts;
SmallDenseSet<Value *, 4> UsedInserts;
+ DenseSet<Value *> VectorCasts;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
@@ -9034,6 +9086,28 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
FirstUsers.emplace_back(VU, ScalarTE);
DemandedElts.push_back(APInt::getZero(FTy->getNumElements()));
VecId = FirstUsers.size() - 1;
+ auto It = MinBWs.find(EU.Scalar);
+ if (It != MinBWs.end() && VectorCasts.insert(EU.Scalar).second) {
+ unsigned BWSz = It->second.second;
+ unsigned SrcBWSz = DL->getTypeSizeInBits(FTy->getElementType());
+ unsigned VecOpcode;
+ if (BWSz < SrcBWSz)
+ VecOpcode = Instruction::Trunc;
+ else
+ VecOpcode =
+ It->second.second ? Instruction::SExt : Instruction::ZExt;
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost C = TTI->getCastInstrCost(
+ VecOpcode, FTy,
+ FixedVectorType::get(
+ IntegerType::get(FTy->getContext(), It->second.first),
+ FTy->getNumElements()),
+ TTI::CastContextHint::None, CostKind);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for extending externally used vector with "
+ "non-equal minimum bitwidth.\n");
+ Cost += C;
+ }
} else {
if (isFirstInsertElement(VU, cast<InsertElementInst>(It->first)))
It->first = VU;
@@ -9069,6 +9143,21 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
CostKind, EU.Lane);
}
}
+ // Add reduced value cost, if resized.
+ if (!VectorizedVals.empty()) {
+ auto BWIt = MinBWs.find(VectorizableTree.front()->Scalars.front());
+ if (BWIt != MinBWs.end()) {
+ Type *DstTy = BWIt->first->getType();
+ unsigned OriginalSz = DL->getTypeSizeInBits(DstTy);
+ unsigned Opcode = Instruction::Trunc;
+ if (OriginalSz < BWIt->second.first)
+ Opcode = BWIt->second.second ? Instruction::SExt : Instruction::ZExt;
+ Type *SrcTy = IntegerType::get(DstTy->getContext(), BWIt->second.first);
+ Cost += TTI->getCastInstrCost(Opcode, DstTy, SrcTy,
+ TTI::CastContextHint::None,
+ TTI::TCK_RecipThroughput);
+ }
+ }
InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
@@ -9274,6 +9363,11 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
continue;
+ auto It = MinBWs.find(VTE->Scalars.front());
+ // If vectorize node is demoted - do not match.
+ if (It != MinBWs.end() &&
+ It->second.first != DL->getTypeSizeInBits(V->getType()))
+ continue;
VToTEs.insert(VTE);
}
if (VToTEs.empty())
@@ -10830,7 +10924,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
return Vec;
}
- auto FinalShuffle = [&](Value *V, const TreeEntry *E) {
+ auto FinalShuffle = [&](Value *V, const TreeEntry *E, VectorType *VecTy,
+ bool IsSigned) {
+ if (V->getType() != VecTy)
+ V = Builder.CreateIntCast(V, VecTy, IsSigned);
ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
if (E->getOpcode() == Instruction::Store) {
ArrayRef<int> Mask =
@@ -10857,6 +10954,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
ScalarTy = Store->getValueOperand()->getType();
else if (auto *IE = dyn_cast<InsertElementInst>(VL0))
ScalarTy = IE->getOperand(1)->getType();
+ bool IsSigned = false;
+ auto It = MinBWs.find(E->Scalars.front());
+ if (It != MinBWs.end()) {
+ ScalarTy = IntegerType::get(F->getContext(), It->second.first);
+ IsSigned = It->second.second;
+ }
auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
@@ -10880,7 +10983,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
PH->getParent()->getFirstInsertionPt());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
if (PostponedPHIs)
@@ -10913,6 +11016,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
Value *Vec = vectorizeOperand(E, i, /*PostponedPHIs=*/true);
+ if (VecTy != Vec->getType()) {
+ assert(It != MinBWs.end() && "Expected item in MinBWs.");
+ Vec = Builder.CreateIntCast(Vec, VecTy, It->second.second);
+ }
NewPhi->addIncoming(Vec, IBB);
}
@@ -10924,7 +11031,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
case Instruction::ExtractElement: {
Value *V = E->getSingleOperand(0);
setInsertPointAfterBundle(E);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
return V;
}
@@ -10934,7 +11041,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *Ptr = LI->getPointerOperand();
LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
Value *NewV = propagateMetadata(V, E->Scalars);
- NewV = FinalShuffle(NewV, E);
+ NewV = FinalShuffle(NewV, E, VecTy, IsSigned);
E->VectorizedValue = NewV;
return NewV;
}
@@ -10942,6 +11049,19 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique");
Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
Value *V = vectorizeOperand(E, 1, PostponedPHIs);
+ ArrayRef<Value *> Op = E->getOperand(1);
+ Type *ScalarTy = Op.front()->getType();
+ if (cast<VectorType>(V->getType())->getElementType() != ScalarTy) {
+ assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
+ std::pair<unsigned, bool> Res = MinBWs.lookup(Op.front());
+ assert(Res.first > 0 && "Expected item in MinBWs.");
+ V = Builder.CreateIntCast(
+ V,
+ FixedVectorType::get(
+ ScalarTy,
+ cast<FixedVectorType>(V->getType())->getNumElements()),
+ Res.second);
+ }
// Create InsertVector shuffle if necessary
auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
@@ -11107,7 +11227,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
auto *CI = cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -11127,11 +11247,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
+ if (L->getType() != R->getType()) {
+ assert(It != MinBWs.end() && "Expected item in MinBWs.");
+ if (L == R) {
+ R = L = Builder.CreateIntCast(L, VecTy, IsSigned);
+ } else {
+ L = Builder.CreateIntCast(L, VecTy, IsSigned);
+ R = Builder.CreateIntCast(R, VecTy, IsSigned);
+ }
+ }
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V = Builder.CreateCmp(P0, L, R);
propagateIRFlags(V, E->Scalars, VL0);
- V = FinalShuffle(V, E);
+ // Do not cast for cmps.
+ VecTy = cast<FixedVectorType>(V->getType());
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -11155,9 +11286,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
+ if (True->getType() != False->getType()) {
+ assert(It != MinBWs.end() && "Expected item in MinBWs.");
+ if (True == False) {
+ True = False = Builder.CreateIntCast(True, VecTy, IsSigned);
+ } else {
+ True = Builder.CreateIntCast(True, VecTy, IsSigned);
+ False = Builder.CreateIntCast(False, VecTy, IsSigned);
+ }
+ }
Value *V = Builder.CreateSelect(Cond, True, False);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -11179,7 +11319,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -11216,6 +11356,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
+ if (LHS->getType() != RHS->getType()) {
+ assert(It != MinBWs.end() && "Expected item in MinBWs.");
+ if (LHS == RHS) {
+ RHS = LHS = Builder.CreateIntCast(LHS, VecTy, IsSigned);
+ } else {
+ LHS = Builder.CreateIntCast(LHS, VecTy, IsSigned);
+ RHS = Builder.CreateIntCast(RHS, VecTy, IsSigned);
+ }
+ }
Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
@@ -11224,7 +11373,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -11270,7 +11419,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
}
Value *V = propagateMetadata(NewLI, E->Scalars);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
@@ -11281,7 +11430,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
setInsertPointAfterBundle(E);
Value *VecValue = vectorizeOperand(E, 0, PostponedPHIs);
- VecValue = FinalShuffle(VecValue, E);
+ VecValue = FinalShuffle(VecValue, E, VecTy, IsSigned);
Value *Ptr = SI->getPointerOperand();
StoreInst *ST =
@@ -11334,7 +11483,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
V = propagateMetadata(I, GEPs);
}
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -11414,7 +11563,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
}
propagateIRFlags(V, E->Scalars, VL0);
- V = FinalShuffle(V, E);
+ V = FinalShuffle(V, E, VecTy, IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -11446,6 +11595,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
+ if (LHS && RHS && LHS->getType() != RHS->getType()) {
+ assert(It != MinBWs.end() && "Expected item in MinBWs.");
+ if (LHS == RHS) {
+ RHS = LHS = Builder.CreateIntCast(LHS, VecTy, IsSigned);
+ } else {
+ LHS = Builder.CreateIntCast(LHS, VecTy, IsSigned);
+ RHS = Builder.CreateIntCast(RHS, VecTy, IsSigned);
+ }
+ }
Value *V0, *V1;
if (Instruction::isBinaryOp(E->getOpcode())) {
@@ -11496,6 +11654,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
CSEBlocks.insert(I->getParent());
}
+ if (V->getType() != VecTy && !isa<CmpInst>(VL0))
+ V = Builder.CreateIntCast(
+ V, FixedVectorType::get(ScalarTy, E->getVectorFactor()), IsSigned);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -11543,8 +11704,7 @@ Value *BoUpSLP::vectorizeTree(
Builder.SetInsertPoint(&F->getE...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
f5f0e47
to
61090e7
Compare
@@ -7892,6 +7892,30 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, | |||
continue; | |||
UsedScalars.set(I); | |||
} | |||
auto GetCastGontextHint = [&](Value *V) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GetCastGontextHint -> GetCastContextHint
TTI::CastContextHint CCH = TTI::CastContextHint::None; | ||
if (const TreeEntry *OpTE = getTreeEntry(V)) { | ||
if (OpTE->State == TreeEntry::ScatterVectorize) { | ||
CCH = TTI::CastContextHint::GatherScatter; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) just return the CCH enum and break up the if-else chains
// Check if the current node must be resized, if the parent node is not | ||
// resized. | ||
if (!UnaryInstruction::isCast(E->getOpcode()) && E->Idx != 0 && | ||
(E->UserTreeIndices.front().UserTE->getOpcode() != |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull out E->UserTreeIndices.front() ref? Its called a lot below.
61090e7
to
b6db3df
Compare
SLP includes analysis for the minimum bitwidth, the actual integer operations can be emitted. It allows to reduce register pressure and improve perf. Currently, it includes only cost model and the next transformation relies on InstructionCombiner. Better to do it directly in SLP, it allows to reduce compile time and fix cost model issues.
b6db3df
to
c4dd784
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
FTR: 5adfad2 |
@@ -9034,6 +9079,28 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) { | |||
FirstUsers.emplace_back(VU, ScalarTE); | |||
DemandedElts.push_back(APInt::getZero(FTy->getNumElements())); | |||
VecId = FirstUsers.size() - 1; | |||
auto It = MinBWs.find(EU.Scalar); | |||
if (It != MinBWs.end() && VectorCasts.insert(EU.Scalar).second) { | |||
unsigned BWSz = It->second.second; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It->second.first ? "second" is a boolean value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, will fix it.
SLP includes analysis for the minimum bitwidth, the actual integer
operations can be emitted. It allows to reduce register pressure and
improve perf. Currently, it includes only cost model and the next
transformation relies on InstructionCombiner. Better to do it directly
in SLP, it allows to reduce compile time and fix cost model issues.