Skip to content

Commit

Permalink
[SVE] Eliminate calls to default-false VectorType::get() from Vectorize
Browse files Browse the repository at this point in the history
Reviewers: efriedma, c-rhodes, david-arm, fhahn

Reviewed By: david-arm

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80339
  • Loading branch information
christetreault-llvm committed May 29, 2020
1 parent c710bb4 commit d2befc6
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 43 deletions.
12 changes: 6 additions & 6 deletions llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Expand Up @@ -1030,10 +1030,10 @@ bool Vectorizer::vectorizeStoreChain(
VectorType *VecTy;
VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
if (VecStoreTy)
VecTy = VectorType::get(StoreTy->getScalarType(),
Chain.size() * VecStoreTy->getNumElements());
VecTy = FixedVectorType::get(StoreTy->getScalarType(),
Chain.size() * VecStoreTy->getNumElements());
else
VecTy = VectorType::get(StoreTy, Chain.size());
VecTy = FixedVectorType::get(StoreTy, Chain.size());

// If it's more than the max vector size or the target has a better
// vector factor, break it into two pieces.
Expand Down Expand Up @@ -1182,10 +1182,10 @@ bool Vectorizer::vectorizeLoadChain(
VectorType *VecTy;
VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
if (VecLoadTy)
VecTy = VectorType::get(LoadTy->getScalarType(),
Chain.size() * VecLoadTy->getNumElements());
VecTy = FixedVectorType::get(LoadTy->getScalarType(),
Chain.size() * VecLoadTy->getNumElements());
else
VecTy = VectorType::get(LoadTy, Chain.size());
VecTy = FixedVectorType::get(LoadTy, Chain.size());

// If it's more than the max vector size or the target has a better
// vector factor, break it into two pieces.
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Expand Up @@ -767,7 +767,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// supported on the target.
if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
// Arbitrarily try a vector of 2 elements.
Type *VecTy = VectorType::get(T, /*NumElements=*/2);
auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of stored type");
if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
reportVectorizationFailure(
Expand All @@ -782,7 +782,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
// For nontemporal loads, check that a nontemporal vector version is
// supported on the target (arbitrarily try a vector of 2 elements).
Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of load type");
if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
reportVectorizationFailure(
Expand Down
51 changes: 28 additions & 23 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -311,7 +311,7 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) {
// Determine if an array of VF elements of type Ty is "bitcast compatible"
// with a <VF x Ty> vector.
if (VF > 1) {
auto *VectorTy = VectorType::get(Ty, VF);
auto *VectorTy = FixedVectorType::get(Ty, VF);
return VF * DL.getTypeAllocSize(Ty) != DL.getTypeStoreSize(VectorTy);
}

Expand Down Expand Up @@ -2074,7 +2074,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
} else {
// Initialize packing with insertelements to start from undef.
Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF));
Value *Undef = UndefValue::get(FixedVectorType::get(V->getType(), VF));
VectorLoopValueMap.setVectorValue(V, Part, Undef);
for (unsigned Lane = 0; Lane < VF; ++Lane)
packScalarIntoVectorValue(V, {Part, Lane});
Expand Down Expand Up @@ -2196,7 +2196,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
// Prepare for the vector type of the interleaved load/store.
Type *ScalarTy = getMemInstValueType(Instr);
unsigned InterleaveFactor = Group->getFactor();
Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
auto *VecTy = FixedVectorType::get(ScalarTy, InterleaveFactor * VF);

// Prepare for the new pointers.
SmallVector<Value *, 2> AddrParts;
Expand Down Expand Up @@ -2300,7 +2300,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(

// If this member has different type, cast the result type.
if (Member->getType() != ScalarTy) {
VectorType *OtherVTy = VectorType::get(Member->getType(), VF);
VectorType *OtherVTy = FixedVectorType::get(Member->getType(), VF);
StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
}

Expand All @@ -2314,7 +2314,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}

// The sub vector type for current instruction.
VectorType *SubVT = VectorType::get(ScalarTy, VF);
auto *SubVT = FixedVectorType::get(ScalarTy, VF);

// Vectorize the interleaved store group.
for (unsigned Part = 0; Part < UF; Part++) {
Expand Down Expand Up @@ -2385,7 +2385,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
"CM decision is not to widen the memory instruction");

Type *ScalarDataTy = getMemInstValueType(Instr);
Type *DataTy = VectorType::get(ScalarDataTy, VF);
auto *DataTy = FixedVectorType::get(ScalarDataTy, VF);
const Align Alignment = getLoadStoreAlignment(Instr);

// Determine if the pointer operand of the access is either consecutive or
Expand Down Expand Up @@ -2688,7 +2688,7 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
"Only one type should be a floating point type");
Type *IntTy =
IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
VectorType *VecIntTy = VectorType::get(IntTy, VF);
auto *VecIntTy = FixedVectorType::get(IntTy, VF);
Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
}
Expand Down Expand Up @@ -3359,7 +3359,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
Type *OriginalTy = I->getType();
Type *ScalarTruncatedTy =
IntegerType::get(OriginalTy->getContext(), KV.second);
Type *TruncatedTy = VectorType::get(
auto *TruncatedTy = FixedVectorType::get(
ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getNumElements());
if (TruncatedTy == OriginalTy)
continue;
Expand Down Expand Up @@ -3413,11 +3413,13 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
auto Elements0 =
cast<VectorType>(SI->getOperand(0)->getType())->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0));
SI->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements0));
auto Elements1 =
cast<VectorType>(SI->getOperand(1)->getType())->getNumElements();
auto *O1 = B.CreateZExtOrTrunc(
SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1));
SI->getOperand(1),
FixedVectorType::get(ScalarTruncatedTy, Elements1));

NewI = B.CreateShuffleVector(O0, O1, SI->getShuffleMask());
} else if (isa<LoadInst>(I) || isa<PHINode>(I)) {
Expand All @@ -3427,14 +3429,16 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
auto Elements =
cast<VectorType>(IE->getOperand(0)->getType())->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
IE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
IE->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements));
auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
auto Elements =
cast<VectorType>(EE->getOperand(0)->getType())->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
EE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
EE->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements));
NewI = B.CreateExtractElement(O0, EE->getOperand(2));
} else {
// If we don't know what to do, be conservative and don't do anything.
Expand Down Expand Up @@ -3598,8 +3602,8 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
if (VF > 1) {
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
VectorInit = Builder.CreateInsertElement(
UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
Builder.getInt32(VF - 1), "vector.recur.init");
UndefValue::get(FixedVectorType::get(VectorInit->getType(), VF)),
VectorInit, Builder.getInt32(VF - 1), "vector.recur.init");
}

// We constructed a temporary phi node in the first phase of vectorization.
Expand Down Expand Up @@ -3821,7 +3825,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
Type *RdxVecTy = FixedVectorType::get(RdxDesc.getRecurrenceType(), VF);
Builder.SetInsertPoint(
LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
VectorParts RdxParts(UF);
Expand Down Expand Up @@ -4148,7 +4152,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
// Create a vector phi with no operands - the vector phi operands will be
// set at the end of vector code generation.
Type *VecTy =
(VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
(VF == 1) ? PN->getType() : FixedVectorType::get(PN->getType(), VF);
Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
VectorLoopValueMap.setVectorValue(P, 0, VecPhi);
OrigPHIsToFix.push_back(P);
Expand All @@ -4167,7 +4171,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
for (unsigned Part = 0; Part < UF; ++Part) {
// This is phase one of vectorizing PHIs.
Type *VecTy =
(VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
(VF == 1) ? PN->getType() : FixedVectorType::get(PN->getType(), VF);
Value *EntryPart = PHINode::Create(
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
VectorLoopValueMap.setVectorValue(P, Part, EntryPart);
Expand Down Expand Up @@ -4327,7 +4331,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,

/// Vectorize casts.
Type *DestTy =
(VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
(VF == 1) ? CI->getType() : FixedVectorType::get(CI->getType(), VF);

for (unsigned Part = 0; Part < UF; ++Part) {
Value *A = State.get(User.getOperand(0), Part);
Expand Down Expand Up @@ -4387,7 +4391,8 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
// Use vector version of the intrinsic.
Type *TysForDecl[] = {CI->getType()};
if (VF > 1)
TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
TysForDecl[0] =
FixedVectorType::get(CI->getType()->getScalarType(), VF);
VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");
} else {
Expand Down Expand Up @@ -5947,7 +5952,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
assert(Group && "Fail to get an interleaved access group.");

unsigned InterleaveFactor = Group->getFactor();
VectorType *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
auto *WideVecTy = FixedVectorType::get(ValTy, VF * InterleaveFactor);

// Holds the indices of existing members in an interleaved load group.
// An interleaved store group doesn't need this as it doesn't allow gaps.
Expand Down Expand Up @@ -6349,7 +6354,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
CondTy = FixedVectorType::get(CondTy, VF);

return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
CostKind, I);
Expand Down Expand Up @@ -7510,8 +7515,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
if (AlsoPack && State.VF > 1) {
// If we're constructing lane 0, initialize to start from undef.
if (State.Instance->Lane == 0) {
Value *Undef =
UndefValue::get(VectorType::get(Ingredient->getType(), State.VF));
Value *Undef = UndefValue::get(
FixedVectorType::get(Ingredient->getType(), State.VF));
State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef);
}
State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance);
Expand Down
24 changes: 12 additions & 12 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Expand Up @@ -3165,7 +3165,7 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {

if (!isValidElementType(EltTy))
return 0;
uint64_t VTSize = DL.getTypeStoreSizeInBits(VectorType::get(EltTy, N));
uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
return 0;
return N;
Expand Down Expand Up @@ -3265,7 +3265,7 @@ getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
SmallVector<Type *, 4> VecTys;
for (Use &Arg : CI->args())
VecTys.push_back(
VectorType::get(Arg->getType(), VecTy->getNumElements()));
FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));

// If the corresponding vector call is cheaper, return its cost.
LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
Expand Down Expand Up @@ -3425,7 +3425,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Calculate the cost of this instruction.
int ScalarCost = VL.size() * ScalarEltCost;

VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
int VecCost = 0;
// Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
Expand All @@ -3445,7 +3445,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CostKind, VL0);
Expand Down Expand Up @@ -3633,8 +3633,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
} else {
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size());
VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size());
auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
CostKind);
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
Expand Down Expand Up @@ -3807,7 +3807,7 @@ int BoUpSLP::getSpillCost() const {
if (NumCalls) {
SmallVector<Type*, 4> V;
for (auto *II : LiveValues)
V.push_back(VectorType::get(II->getType(), BundleWidth));
V.push_back(FixedVectorType::get(II->getType(), BundleWidth));
Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
}

Expand Down Expand Up @@ -4100,7 +4100,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
else
VL = UniqueValues;
}
VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());

Value *V = Gather(VL, VecTy);
if (!ReuseShuffleIndicies.empty()) {
Expand Down Expand Up @@ -4135,7 +4135,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Type *ScalarTy = VL0->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
ScalarTy = SI->getValueOperand()->getType();
VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());

bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();

Expand Down Expand Up @@ -4532,7 +4532,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}

Module *M = F->getParent();
Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
Function *CF = Intrinsic::getDeclaration(M, ID, Tys);

if (!UseIntrinsic) {
Expand Down Expand Up @@ -4660,7 +4660,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
auto BundleWidth = VectorizableTree[0]->Scalars.size();
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto *VecTy = VectorType::get(MinTy, BundleWidth);
auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);
auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
VectorizableTree[0]->VectorizedValue = Trunc;
}
Expand Down Expand Up @@ -5988,7 +5988,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// No actual vectorization should happen, if number of parts is the same as
// provided vectorization factor (i.e. the scalar type is used for vector
// code during codegen).
auto *VecTy = VectorType::get(VL[0]->getType(), VF);
auto *VecTy = FixedVectorType::get(VL[0]->getType(), VF);
if (TTI->getNumberOfParts(VecTy) == VF)
continue;
for (unsigned I = NextInst; I < MaxInst; ++I) {
Expand Down

0 comments on commit d2befc6

Please sign in to comment.