From 3b3a4c270bcbc12c957c941c8ee2a7bfea0fa673 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 10 Mar 2023 14:34:45 +0100 Subject: [PATCH] Revert "[SLP]Initial support for reshuffling of non-starting buildvector/gather nodes." This caused verifier errors: Instruction does not dominate all uses! %8 = insertelement <2 x i64> %7, i64 %pgocount1330, i64 1 %15 = shufflevector <2 x i64> %8, <2 x i64> poison, <2 x i32> in function ?NearestInclusiveAncestorAssignedToSlot@SlotScopedTraversal@blink@@SAPAVElement@2@ABV32@@Z (or register allocator crash when the verifier was disabled). See comment on the code review. > Previously only the very first gather/buildvector node might be probed for reshuffling of other nodes. > But the compiler may do the same for other gather/buildvector nodes too, just need to check the > dependency and postpone the emission of the dependent nodes, if the origin nodes were not emitted yet. > > Part of D110978 > > Differential Revision: https://reviews.llvm.org/D144958 This reverts commit a611b3f3059e4c3b9e7b914091c3edaef099fd5d. It also reverts 7a4061ae372b3262703ffeea3b64db89187db611 which depended on the above. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 198 +++++--------- ...ccelerate-vector-functions-inseltpoison.ll | 66 +++-- .../AArch64/accelerate-vector-functions.ll | 66 +++-- .../vectorize-free-extracts-inserts.ll | 235 +++++++++++----- .../AMDGPU/add_sub_sat-inseltpoison.ll | 12 +- .../SLPVectorizer/AMDGPU/add_sub_sat.ll | 12 +- .../AMDGPU/crash_extract_subvector_cost.ll | 14 +- .../AMDGPU/phi-result-use-order.ll | 75 ++++-- .../SLPVectorizer/X86/PR35865-inseltpoison.ll | 13 +- .../Transforms/SLPVectorizer/X86/PR35865.ll | 13 +- .../Transforms/SLPVectorizer/X86/PR39774.ll | 20 +- .../X86/alternate-calls-inseltpoison.ll | 18 +- .../SLPVectorizer/X86/alternate-calls.ll | 18 +- .../X86/alternate-cast-inseltpoison.ll | 6 +- .../SLPVectorizer/X86/alternate-cast.ll | 6 +- .../X86/alternate-fp-inseltpoison.ll | 2 +- .../SLPVectorizer/X86/alternate-fp.ll | 2 +- .../X86/alternate-int-inseltpoison.ll | 62 ++--- .../SLPVectorizer/X86/alternate-int.ll | 62 ++--- .../X86/arith-fp-inseltpoison.ll | 62 +++-- .../Transforms/SLPVectorizer/X86/arith-fp.ll | 62 +++-- .../X86/crash_exceed_scheduling.ll | 33 +-- .../SLPVectorizer/X86/crash_smallpt.ll | 136 ++++++---- .../X86/extract-shuffle-inseltpoison.ll | 9 +- .../SLPVectorizer/X86/extract-shuffle.ll | 9 +- .../Transforms/SLPVectorizer/X86/extract.ll | 13 +- .../X86/extractelement-multiple-uses.ll | 6 +- .../SLPVectorizer/X86/extractelement.ll | 24 +- .../gather-extractelements-different-bbs.ll | 33 +-- .../SLPVectorizer/X86/hadd-inseltpoison.ll | 8 +- .../test/Transforms/SLPVectorizer/X86/hadd.ll | 8 +- ...insert-element-build-vector-const-undef.ll | 23 +- ...nsert-element-build-vector-inseltpoison.ll | 48 ++-- .../X86/insert-element-build-vector.ll | 48 ++-- .../X86/insertelement-postpone.ll | 10 +- .../SLPVectorizer/X86/landing_pad.ll | 22 +- .../Transforms/SLPVectorizer/X86/lookahead.ll | 254 +++++++++--------- .../SLPVectorizer/X86/malformed_phis.ll | 85 +++--- .../Transforms/SLPVectorizer/X86/odd_store.ll | 22 +- .../SLPVectorizer/X86/pr47629-inseltpoison.ll | 135 +++++----- .../Transforms/SLPVectorizer/X86/pr47629.ll | 135 +++++----- .../Transforms/SLPVectorizer/X86/pr49081.ll | 2 +- .../SLPVectorizer/X86/reduction-logical.ll | 114 ++++---- .../SLPVectorizer/X86/reduction-same-vals.ll | 12 +- .../X86/remark_extract_broadcast.ll | 6 +- .../X86/reused-scalars-in-buildvector.ll | 30 ++- .../X86/scatter-vectorize-reused-pointer.ll | 2 +- .../X86/vect-gather-same-nodes.ll | 20 +- .../X86/vectorize-widest-phis.ll | 20 +- 49 files changed, 1293 insertions(+), 998 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 80e60ff0c8af1..20b0360ae1daf 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1182,7 +1182,6 @@ class BoUpSLP { MinBWs.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; - PostponedGathers.clear(); } unsigned getTreeSize() const { return VectorizableTree.size(); } @@ -2939,11 +2938,6 @@ class BoUpSLP { /// pre-gather them before. DenseMap EntryToLastInstruction; - /// List of gather nodes, depending on other gather/vector nodes, which should - /// be emitted after the vector instruction emission process to correctly - /// handle order of the vector instructions and shuffles. - SetVector PostponedGathers; - /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { ExternalUser(Value *S, llvm::User *U, int L) @@ -6995,8 +6989,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, return TTI::TCC_Free; // Add broadcast for non-identity shuffle only. bool NeedShuffle = - count(VL, *It) > 1 && - (VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof)); + VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof); InstructionCost InsertCost = TTI->getVectorInstrCost( Instruction::InsertElement, VecTy, CostKind, NeedShuffle ? 0 : std::distance(VL.begin(), It), @@ -7006,7 +6999,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, TargetTransformInfo::SK_Broadcast, VecTy, /*Mask=*/std::nullopt, CostKind, /*Index=*/0, - /*SubTp=*/nullptr, /*Args=*/*It) + /*SubTp=*/nullptr, /*Args=*/VL[0]) : TTI::TCC_Free); } InstructionCost ReuseShuffleCost = 0; @@ -8403,7 +8396,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, // have a permutation of 2 input vectors. SmallVector> UsedTEs; DenseMap UsedValuesEntry; - for (Value *V : VL) { + for (Value *V : TE->Scalars) { if (isConstant(V)) continue; // Build a list of tree entries where V is used. @@ -8411,12 +8404,8 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, auto It = ValueToTEs.find(V); if (It != ValueToTEs.end()) VToTEs = It->second; - if (const TreeEntry *VTE = getTreeEntry(V)) { - Instruction &EntryUserInst = getLastInstructionInBundle(VTE); - if (&EntryUserInst == &UserInst || !CheckOrdering(&EntryUserInst)) - continue; + if (const TreeEntry *VTE = getTreeEntry(V)) VToTEs.insert(VTE); - } if (VToTEs.empty()) continue; if (UsedTEs.empty()) { @@ -8472,7 +8461,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, auto *It = find_if(FirstEntries, [=](const TreeEntry *EntryPtr) { return EntryPtr->isSame(VL) || EntryPtr->isSame(TE->Scalars); }); - if (It != FirstEntries.end() && (*It)->getVectorFactor() == VL.size()) { + if (It != FirstEntries.end()) { Entries.push_back(*It); std::iota(Mask.begin(), Mask.end(), 0); // Clear undef scalars. @@ -8514,18 +8503,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, break; } } - // No 2 source vectors with the same vector factor - just choose 2 with max - // index. - if (Entries.empty()) { - Entries.push_back( - *std::max_element(UsedTEs.front().begin(), UsedTEs.front().end(), - [](const TreeEntry *TE1, const TreeEntry *TE2) { - return TE1->Idx < TE2->Idx; - })); - Entries.push_back(SecondEntries.front()); - VF = std::max(Entries.front()->getVectorFactor(), - Entries.back()->getVectorFactor()); - } + // No 2 source vectors with the same vector factor - give up and do regular + // gather. + if (Entries.empty()) + return std::nullopt; } bool IsSplatOrUndefs = isSplat(VL) || all_of(VL, UndefValue::classof); @@ -9396,7 +9377,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { }; if (E->State == TreeEntry::NeedToGather) { - if (E->getMainOp() && E->Idx == 0) + if (E->Idx > 0) { + // We are in the middle of a vectorizable chain. We need to gather the + // scalars from the users. + Value *Vec = createBuildVector(E); + E->VectorizedValue = Vec; + return Vec; + } + if (E->getMainOp()) setInsertPointAfterBundle(E); unsigned VF = E->getVectorFactor(); auto AdjustExtracts = [&](const TreeEntry *E, ArrayRef Mask) { @@ -9407,16 +9395,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { continue; auto *EI = cast(E->Scalars[I]); VecBase = EI->getVectorOperand(); - // TODO: EI can be erased, if all its users are vectorized. But need to - // emit shuffles for such extractelement instructions. + // If all users are vectorized - can delete the extractelement + // itself. + if (any_of(EI->users(), + [&](User *U) { return !ScalarToTreeEntry.count(U); })) + continue; + eraseInstruction(EI); } return VecBase; }; auto CreateShuffle = [&](Value *V1, Value *V2, ArrayRef Mask) { - unsigned VF1 = cast(V1->getType())->getNumElements(); - unsigned VF2 = cast(V2->getType())->getNumElements(); - unsigned VF = std::max(VF1, VF2); - if (VF1 != VF2) { + if (V1->getType() != V2->getType()) { + unsigned VF1 = cast(V1->getType())->getNumElements(); + unsigned VF2 = cast(V2->getType())->getNumElements(); + unsigned VF = std::max(VF1, VF2); SmallVector ExtMask(VF, UndefMaskElem); std::iota(ExtMask.begin(), std::next(ExtMask.begin(), std::min(VF1, VF2)), 0); @@ -9434,39 +9426,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } } } - const int Limit = Mask.size() * 2; - if (V1 == V2 && Mask.size() == VF && - all_of(Mask, [=](int Idx) { return Idx < Limit; }) && - (ShuffleVectorInst::isIdentityMask(Mask) || - (ShuffleVectorInst::isZeroEltSplatMask(Mask) && - isa(V1) && - cast(V1)->getShuffleMask() == Mask))) - return V1; - Value *Vec = V1 == V2 ? Builder.CreateShuffleVector(V1, Mask) - : Builder.CreateShuffleVector(V1, V2, Mask); + Value *Vec = Builder.CreateShuffleVector(V1, V2, Mask); if (auto *I = dyn_cast(Vec)) { GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } return Vec; }; - auto NeedToDelay = [=](const TreeEntry *E, - ArrayRef Deps) -> Value * { - // No need to delay emission if all deps are ready. - if (all_of(Deps, [](const TreeEntry *TE) { return TE->VectorizedValue; })) - return nullptr; - // Postpone gather emission, will be emitted after the end of the - // process to keep correct order. - auto *VecTy = FixedVectorType::get(E->Scalars.front()->getType(), - E->getVectorFactor()); - Value *Vec = Builder.CreateAlignedLoad( - VecTy, PoisonValue::get(VecTy->getPointerTo()), MaybeAlign()); - return Vec; - }; - SmallVector - ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(), - E->ReuseShuffleIndices.end()); + SmallVector ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(), + E->ReuseShuffleIndices.end()); SmallVector GatheredScalars(E->Scalars.begin(), E->Scalars.end()); // Build a mask out of the reorder indices and reorder scalars per this // mask. @@ -9481,53 +9450,40 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { std::optional GatherShuffle; SmallVector Entries; Type *ScalarTy = GatheredScalars.front()->getType(); - if (!all_of(GatheredScalars, UndefValue::classof)) { - // Check for gathered extracts. - ExtractShuffle = tryToGatherExtractElements(GatheredScalars, ExtractMask); - SmallVector IgnoredVals; - if (UserIgnoreList) - IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end()); - bool Resized = false; - if (Value *VecBase = AdjustExtracts(E, ExtractMask)) - if (auto *VecBaseTy = dyn_cast(VecBase->getType())) - if (VF == VecBaseTy->getNumElements() && - GatheredScalars.size() != VF) { - Resized = true; - GatheredScalars.append(VF - GatheredScalars.size(), - PoisonValue::get(ScalarTy)); - } - // Gather extracts after we check for full matched gathers only. - if (ExtractShuffle || E->getOpcode() != Instruction::Load || - E->isAltShuffle() || - all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || - isSplat(E->Scalars) || - (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { - GatherShuffle = - isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); - } - if (GatherShuffle) { - if (Value *Delayed = NeedToDelay(E, Entries)) { - E->VectorizedValue = Delayed; - // Delay emission of gathers which are not ready yet. - PostponedGathers.insert(E); - // Postpone gather emission, will be emitted after the end of the - // process to keep correct order. - return Delayed; - } - assert((Entries.size() == 1 || Entries.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - if (!Resized) { - unsigned VF1 = Entries.front()->getVectorFactor(); - unsigned VF2 = Entries.back()->getVectorFactor(); - if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) - GatheredScalars.append(VF - GatheredScalars.size(), - PoisonValue::get(ScalarTy)); - } - // Remove shuffled elements from list of gathers. - for (int I = 0, Sz = Mask.size(); I < Sz; ++I) { - if (Mask[I] != UndefMaskElem) - GatheredScalars[I] = PoisonValue::get(ScalarTy); + // Check for gathered extracts. + ExtractShuffle = tryToGatherExtractElements(GatheredScalars, ExtractMask); + SmallVector IgnoredVals; + if (UserIgnoreList) + IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end()); + bool Resized = false; + if (Value *VecBase = AdjustExtracts(E, ExtractMask)) + if (auto *VecBaseTy = dyn_cast(VecBase->getType())) + if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) { + Resized = true; + GatheredScalars.append(VF - GatheredScalars.size(), + PoisonValue::get(ScalarTy)); } + // Gather extracts after we check for full matched gathers only. + if (ExtractShuffle || E->getOpcode() != Instruction::Load || + E->isAltShuffle() || + all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || + isSplat(E->Scalars) || + (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) + GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); + if (GatherShuffle) { + assert((Entries.size() == 1 || Entries.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + if (!Resized) { + unsigned VF1 = Entries.front()->getVectorFactor(); + unsigned VF2 = Entries.back()->getVectorFactor(); + if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) + GatheredScalars.append(VF - GatheredScalars.size(), + PoisonValue::get(ScalarTy)); + } + // Remove shuffled elements from list of gathers. + for (int I = 0, Sz = Mask.size(); I < Sz; ++I) { + if (Mask[I] != UndefMaskElem) + GatheredScalars[I] = PoisonValue::get(ScalarTy); } } if ((ExtractShuffle || GatherShuffle) && @@ -9559,9 +9515,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Vec1 = CreateShuffle(Vec1, Vec2, ExtractMask); else if (Vec1) Vec1 = CreateShuffle(Vec1, Vec1, ExtractMask); - else - Vec1 = PoisonValue::get( - FixedVectorType::get(ScalarTy, GatheredScalars.size())); } if (GatherShuffle) { Vec = CreateShuffle(Entries.front()->VectorizedValue, @@ -9579,10 +9532,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } else { Vec = Vec1; } - Vec = FinalShuffle(Vec, E); } else { - Vec = createBuildVector(E); + Vec = gather(E->Scalars); } + Vec = FinalShuffle(Vec, E); E->VectorizedValue = Vec; return Vec; } @@ -10224,27 +10177,6 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues, Builder.SetInsertPoint(ReductionRoot ? ReductionRoot : &F->getEntryBlock().front()); auto *VectorRoot = vectorizeTree(VectorizableTree[0].get()); - // Run through the list of postponed gathers and emit them, replacing the temp - // emitted allocas with actual vector instructions. - ArrayRef PostponedNodes = PostponedGathers.getArrayRef(); - for (const TreeEntry *E : PostponedNodes) { - auto *TE = const_cast(E); - if (auto *VecTE = getTreeEntry(TE->Scalars.front())) - if (VecTE->isSame(TE->UserTreeIndices.front().UserTE->getOperand( - TE->UserTreeIndices.front().EdgeIdx))) - // Found gather node which is absolutely the same as one of the - // vectorized nodes. It may happen after reordering. - continue; - auto *PrevVec = cast(TE->VectorizedValue); - TE->VectorizedValue = nullptr; - auto *UserI = - cast(TE->UserTreeIndices.front().UserTE->VectorizedValue); - Builder.SetInsertPoint(PrevVec); - Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); - Value *Vec = vectorizeTree(TE); - PrevVec->replaceAllUsesWith(Vec); - eraseInstruction(PrevVec); - } // If the vectorized tree can be rewritten in a smaller type, we truncate the // vectorized root. InstCombine will then rewrite the entire expression. We diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll index ee24944e5cec6..5e8616673bbcd 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll @@ -24,10 +24,13 @@ define <4 x float> @int_sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -217,10 +220,13 @@ define <4 x float> @exp_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -297,10 +303,13 @@ define <4 x float> @log_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -470,10 +479,13 @@ define <4 x float> @sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -509,10 +521,13 @@ define <4 x float> @cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -997,10 +1012,13 @@ define <4 x float> @int_cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll index 2bac49a58cd47..4df52503e1e2c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll @@ -24,10 +24,13 @@ define <4 x float> @int_sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -217,10 +220,13 @@ define <4 x float> @exp_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -297,10 +303,13 @@ define <4 x float> @log_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -470,10 +479,13 @@ define <4 x float> @sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -509,10 +521,13 @@ define <4 x float> @cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -997,10 +1012,13 @@ define <4 x float> @int_cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 +; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) +; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll index 2af7efb3d81fe..1cdac15dba943 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -13,13 +13,16 @@ define void @noop_extracts_first_2_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <2 x double>, ptr [[PTR_1:%.*]], align 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[V_1]], i32 0 -; CHECK-NEXT: call void @use(double [[TMP2]]) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 1 +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[V2_LANE_3:%.*]] = extractelement <4 x double> [[V_2]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V2_LANE_3]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[V_1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0 ; CHECK-NEXT: call void @use(double [[TMP3]]) -; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1 +; CHECK-NEXT: call void @use(double [[TMP4]]) +; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -53,12 +56,15 @@ define void @extracts_first_2_lanes_different_vectors(ptr %ptr.1, ptr %ptr.2, pt ; CHECK-NEXT: [[V_3:%.*]] = load <2 x double>, ptr [[PTR_3:%.*]], align 8 ; CHECK-NEXT: [[V3_LANE_1:%.*]] = extractelement <2 x double> [[V_3]], i32 1 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[V_1]], <2 x double> [[V_3]], <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V3_LANE_1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V3_LANE_1]]) -; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -92,13 +98,16 @@ define void @noop_extract_second_2_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <4 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <4 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -128,8 +137,10 @@ define void @extract_reverse_order(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <2 x double>, ptr [[PTR_1:%.*]], align 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[TMP0]] +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0 ; CHECK-NEXT: call void @use(double [[TMP3]]) @@ -167,13 +178,16 @@ define void @extract_lanes_1_and_2(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_1:%.*]] = extractelement <4 x double> [[V_1]], i32 1 ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <4 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_2]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) -; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -209,17 +223,23 @@ define void @noop_extracts_existing_vector_4_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 +; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> poison, double [[V1_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[V1_LANE_0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_1]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_0]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP4]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP7]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -259,15 +279,24 @@ define void @extracts_jumbled_4_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 +; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> poison, double [[V1_LANE_0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[V1_LANE_2]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[V1_LANE_1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_3]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_0]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP3]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP8]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -304,22 +333,51 @@ define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-LABEL: @noop_extracts_9_lanes( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8 +; CHECK-NEXT: [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0 +; CHECK-NEXT: [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1 ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 +; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 +; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4 ; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 +; CHECK-NEXT: [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6 +; CHECK-NEXT: [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7 +; CHECK-NEXT: [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_3]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_4]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_5]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_6]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_7]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_8]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_0]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_1]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -392,22 +450,47 @@ define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-LABEL: @first_mul_chain_jumbled( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8 +; CHECK-NEXT: [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0 +; CHECK-NEXT: [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1 ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 +; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 +; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4 ; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 +; CHECK-NEXT: [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6 +; CHECK-NEXT: [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7 +; CHECK-NEXT: [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_4]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_6]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_5]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_8]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_7]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_1]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_0]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_1]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_0]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_2]], i32 2 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fmul <8 x double> [[TMP4]], [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 +; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE1]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP6]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP22]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -480,23 +563,51 @@ define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-LABEL: @first_and_second_mul_chain_jumbled( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8 +; CHECK-NEXT: [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0 +; CHECK-NEXT: [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1 ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 +; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4 +; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 +; CHECK-NEXT: [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6 +; CHECK-NEXT: [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7 +; CHECK-NEXT: [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 +; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_4]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_5]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_6]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_8]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_7]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_1]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_0]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_6]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_1]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_0]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_3]], i32 5 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_2]], i32 6 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_5]], i32 7 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll index e6d80675635a2..6fd4b14eb799e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll @@ -244,8 +244,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { ; GFX8-NEXT: bb: ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> @@ -291,11 +291,11 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) ; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> ; GFX8-NEXT: ret <4 x i16> [[INS_31]] diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll index e99be89ef9bba..24fee6387beb5 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll @@ -244,8 +244,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { ; GFX8-NEXT: bb: ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> @@ -291,11 +291,11 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) ; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> ; GFX8-NEXT: ret <4 x i16> [[INS_31]] diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll index 68295bb0d78e3..7471d0f7d329c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll @@ -4,10 +4,16 @@ define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) { ; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1:%.*]], <9 x i16> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; CHECK-NEXT: ret <2 x i16> [[TMP2]] +; CHECK-NEXT: [[ARG0_1:%.*]] = extractelement <9 x i16> undef, i64 7 +; CHECK-NEXT: [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8 +; CHECK-NEXT: [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7 +; CHECK-NEXT: [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[ARG0_1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[TMP0]], i16 [[ARG0_2]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[ARG1_1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> [[TMP2]], i16 [[ARG1_2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP1]], <2 x i16> [[TMP3]]) +; CHECK-NEXT: ret <2 x i16> [[TMP4]] ; bb: %arg0.1 = extractelement <9 x i16> undef, i64 7 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll index eb4117bbf7209..f870fb3a9bc1f 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -1,24 +1,34 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=slp-vectorizer -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck %s define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1 +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: br label [[BB1]] +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1 +; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[O31:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> -; CHECK-NEXT: ret <4 x half> [[O31]] -; +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> +; CHECK-NEXT: ret <4 x half> [[TMP12]] entry: %a0 = extractelement <4 x half> %in1, i64 0 %a1 = extractelement <4 x half> %in1, i64 1 @@ -49,21 +59,32 @@ bb1: define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis_reverse( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1 +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: br label [[BB1]] +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1 +; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[O31:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> -; CHECK-NEXT: ret <4 x half> [[O31]] -; +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> +; CHECK-NEXT: ret <4 x half> [[TMP12]] entry: %a0 = extractelement <4 x half> %in1, i64 0 %a1 = extractelement <4 x half> %in1, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll index cc1fa07d4c4f3..7080b693e0ff6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll @@ -4,11 +4,14 @@ define void @test(<16 x half> %v) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[V:%.*]], <16 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x half> [[TMP0]] to <2 x float> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> [[V:%.*]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> [[V]], i32 5 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll index a9e3aa33ae92d..244faa4b3e78e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -4,11 +4,14 @@ define void @test(<16 x half> %v) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[V:%.*]], <16 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x half> [[TMP0]] to <2 x float> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> [[V:%.*]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> [[V]], i32 5 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index ae8a52e669d35..a1babad830b88 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -7,16 +7,18 @@ define void @Test(i32) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP0:%.*]], [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> , i32 [[OP_RDX]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = and <2 x i32> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP9]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP4]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP0:%.*]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> , i32 [[OP_RDX]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> ; CHECK-NEXT: br label [[LOOP]] ; ; FORCE_REDUCTION-LABEL: @Test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll index ee217c07895ee..c801cd296917f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll @@ -11,12 +11,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; SSE-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SSE-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SSE-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -32,12 +32,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; SLM-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SLM-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -53,12 +53,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll index 0f7d3b5647ff9..b4d525fc4db5a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll @@ -11,12 +11,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; SSE-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SSE-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SSE-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -32,12 +32,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; SLM-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SLM-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -53,12 +53,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll index 11eddced91a12..40678b9c87829 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll @@ -163,7 +163,7 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: @sitofp_4i32_8i16( ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> ; CHECK-NEXT: [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[R71]] @@ -201,11 +201,11 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll index 59a77598b6640..b09c142cca50c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -163,7 +163,7 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: @sitofp_4i32_8i16( ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> ; CHECK-NEXT: [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[R71]] @@ -201,11 +201,11 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll index 0f59f595f911e..2dcaec8d11023 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll @@ -98,7 +98,7 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) { ; SLM-LABEL: @fmul_fdiv_v4f32_const( ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i64 2 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00 ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll index ef13f7956e16d..99e673ee52f54 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -98,7 +98,7 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) { ; SLM-LABEL: @fmul_fdiv_v4f32_const( ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i64 2 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00 ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll index 9cf0e1184c4fe..19e7ed8d52117 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -170,17 +170,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> ; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> ; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[R71]] @@ -236,8 +236,8 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i64 7 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i64 6 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> @@ -253,13 +253,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: ret <8 x i32> [[R7]] ; ; SLM-LABEL: @ashr_lshr_shl_v8i32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; SLM-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; SLM-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -267,13 +267,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: ret <8 x i32> [[R71]] ; ; AVX1-LABEL: @ashr_lshr_shl_v8i32( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; AVX1-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; AVX1-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -281,13 +281,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: ret <8 x i32> [[R71]] ; ; AVX2-LABEL: @ashr_lshr_shl_v8i32( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; AVX2-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -295,13 +295,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; AVX2-NEXT: ret <8 x i32> [[R71]] ; ; AVX512-LABEL: @ashr_lshr_shl_v8i32( -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; AVX512-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -443,10 +443,10 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX2-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 1 ; AVX2-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 ; AVX2-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX2-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i64 1 ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -460,10 +460,10 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX512-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 1 ; AVX512-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 ; AVX512-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX512-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i64 1 ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -503,11 +503,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; CHECK-LABEL: @add_sub_v8i32_splat( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP5]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[SHUFFLE]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[SHUFFLE]], [[A]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP4]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 976be7d7f1c7e..c07c4b794f150 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -170,17 +170,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> ; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> ; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[R71]] @@ -236,8 +236,8 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i64 7 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i64 6 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> @@ -253,13 +253,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: ret <8 x i32> [[R7]] ; ; SLM-LABEL: @ashr_lshr_shl_v8i32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; SLM-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; SLM-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -267,13 +267,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: ret <8 x i32> [[R71]] ; ; AVX1-LABEL: @ashr_lshr_shl_v8i32( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; AVX1-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; AVX1-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -281,13 +281,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: ret <8 x i32> [[R71]] ; ; AVX2-LABEL: @ashr_lshr_shl_v8i32( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; AVX2-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -295,13 +295,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; AVX2-NEXT: ret <8 x i32> [[R71]] ; ; AVX512-LABEL: @ashr_lshr_shl_v8i32( -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> ; AVX512-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -443,10 +443,10 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX2-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 1 ; AVX2-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 ; AVX2-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX2-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> , i32 [[AB1]], i64 1 ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -460,10 +460,10 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX512-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 1 ; AVX512-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 ; AVX512-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX512-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> , i32 [[AB1]], i64 1 ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -503,11 +503,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; CHECK-LABEL: @add_sub_v8i32_splat( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP5]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[SHUFFLE]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[SHUFFLE]], [[A]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP4]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll index c556ea7491d48..a1e47e5b31aa0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll @@ -607,25 +607,49 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { ; SSE-NEXT: ret <8 x double> [[TMP1]] ; ; SLM-LABEL: @buildvector_div_8f64( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fdiv <2 x double> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP9:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP8]] -; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP12:%.*]] = fdiv <2 x double> [[TMP10]], [[TMP11]] -; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x i32> -; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP15]], <8 x i32> -; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP16]], <8 x i32> +; SLM-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 +; SLM-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 +; SLM-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 +; SLM-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 +; SLM-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 +; SLM-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 +; SLM-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 +; SLM-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 +; SLM-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 +; SLM-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 +; SLM-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 +; SLM-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 +; SLM-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 +; SLM-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 +; SLM-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 +; SLM-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1 +; SLM-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]] +; SLM-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 +; SLM-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1 +; SLM-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[B2]], i32 0 +; SLM-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1 +; SLM-NEXT: [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]] +; SLM-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[A4]], i32 0 +; SLM-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[A5]], i32 1 +; SLM-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[B4]], i32 0 +; SLM-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[B5]], i32 1 +; SLM-NEXT: [[TMP15:%.*]] = fdiv <2 x double> [[TMP12]], [[TMP14]] +; SLM-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[A6]], i32 0 +; SLM-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[A7]], i32 1 +; SLM-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[B6]], i32 0 +; SLM-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[B7]], i32 1 +; SLM-NEXT: [[TMP20:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP19]] +; SLM-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x i32> +; SLM-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP23]], <8 x i32> +; SLM-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP24]], <8 x i32> ; SLM-NEXT: ret <8 x double> [[R73]] ; ; AVX-LABEL: @buildvector_div_8f64( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll index f331735558b0a..8022b09f05c0b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll @@ -607,25 +607,49 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { ; SSE-NEXT: ret <8 x double> [[TMP1]] ; ; SLM-LABEL: @buildvector_div_8f64( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fdiv <2 x double> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP9:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP8]] -; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> -; SLM-NEXT: [[TMP12:%.*]] = fdiv <2 x double> [[TMP10]], [[TMP11]] -; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x i32> -; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP15]], <8 x i32> -; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP16]], <8 x i32> +; SLM-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 +; SLM-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 +; SLM-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 +; SLM-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 +; SLM-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 +; SLM-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 +; SLM-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 +; SLM-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 +; SLM-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 +; SLM-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 +; SLM-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 +; SLM-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 +; SLM-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 +; SLM-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 +; SLM-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 +; SLM-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1 +; SLM-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]] +; SLM-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 +; SLM-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1 +; SLM-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[B2]], i32 0 +; SLM-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1 +; SLM-NEXT: [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]] +; SLM-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[A4]], i32 0 +; SLM-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[A5]], i32 1 +; SLM-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[B4]], i32 0 +; SLM-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[B5]], i32 1 +; SLM-NEXT: [[TMP15:%.*]] = fdiv <2 x double> [[TMP12]], [[TMP14]] +; SLM-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[A6]], i32 0 +; SLM-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[A7]], i32 1 +; SLM-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[B6]], i32 0 +; SLM-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[B7]], i32 1 +; SLM-NEXT: [[TMP20:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP19]] +; SLM-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x i32> +; SLM-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP23]], <8 x i32> +; SLM-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP24]], <8 x i32> ; SLM-NEXT: ret <8 x double> [[R73]] ; ; AVX-LABEL: @buildvector_div_8f64( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 18d9f8c903c5b..65817d74ee54f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -5,19 +5,19 @@ define void @exceed(double %0, double %1) { ; CHECK-LABEL: @exceed( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP5]], undef ; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP5]], undef ; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef @@ -27,15 +27,16 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 -; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP6]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP9]], [[TMP7]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP5]], <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <2 x double> [[TMP13]], undef +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[TMP12]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] ; CHECK-NEXT: ] @@ -44,7 +45,7 @@ define void @exceed(double %0, double %1) { ; CHECK: bb2: ; CHECK-NEXT: br label [[LABEL]] ; CHECK: label: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP14]], [[BB2]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x double> [ [[TMP10]], [[BB1]] ], [ [[TMP13]], [[BB2]] ] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index f2acfddb659c6..5b7935ba02c32 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -1,10 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=slp-vectorizer,dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s -%struct.Ray = type { %struct.Vec, %struct.Vec } -%struct.Vec = type { double, double, double } +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" -define void @main() { +%struct.Ray.5.11.53.113.119.137.149.185.329.389.416 = type { %struct.Vec.0.6.48.108.114.132.144.180.324.384.414, %struct.Vec.0.6.48.108.114.132.144.180.324.384.414 } +%struct.Vec.0.6.48.108.114.132.144.180.324.384.414 = type { double, double, double } + +; Function Attrs: ssp uwtable +define void @main() #0 { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[COND_TRUE:%.*]], label [[COND_END:%.*]] @@ -15,7 +19,7 @@ define void @main() { ; CHECK: invoke.cont: ; CHECK-NEXT: br i1 undef, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]] ; CHECK: arrayctor.cont: -; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0 +; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_113_119_137_149_185_329_389_416:%.*]], ptr undef, i64 0, i32 1, i32 0 ; CHECK-NEXT: br label [[FOR_COND36_PREHEADER:%.*]] ; CHECK: for.cond36.preheader: ; CHECK-NEXT: br i1 undef, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]] @@ -24,14 +28,18 @@ define void @main() { ; CHECK: cond.true48.us: ; CHECK-NEXT: br i1 undef, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]] ; CHECK: cond.false66.us: -; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double [[ADD_I276_US]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> , [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> , [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP1]] -; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> , double [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> , double [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[TMP7]] +; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8 ; CHECK-NEXT: unreachable ; CHECK: cond.true63.us: ; CHECK-NEXT: unreachable @@ -43,104 +51,114 @@ define void @main() { entry: br i1 undef, label %cond.true, label %cond.end -cond.true: +cond.true: ; preds = %entry unreachable -cond.end: +cond.end: ; preds = %entry br label %invoke.cont -invoke.cont: +invoke.cont: ; preds = %invoke.cont, %cond.end br i1 undef, label %arrayctor.cont, label %invoke.cont -arrayctor.cont: - %agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 0, i32 1 - %agg.tmp101211.sroa.0.0.idx = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 1, i32 0 - %agg.tmp101211.sroa.1.8.idx390 = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 1, i32 1 +arrayctor.cont: ; preds = %invoke.cont + %agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416, ptr undef, i64 0, i32 0, i32 1 + %agg.tmp101211.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416, ptr undef, i64 0, i32 1, i32 0 + %agg.tmp101211.sroa.1.8.idx390 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416, ptr undef, i64 0, i32 1, i32 1 br label %for.cond36.preheader -for.cond36.preheader: +for.cond36.preheader: ; preds = %_Z5clampd.exit.1, %arrayctor.cont br i1 undef, label %for.body42.lr.ph.us, label %_Z5clampd.exit.1 -cond.false51.us: +cond.false51.us: ; preds = %for.body42.lr.ph.us unreachable -cond.true48.us: +cond.true48.us: ; preds = %for.body42.lr.ph.us br i1 undef, label %cond.true63.us, label %cond.false66.us -cond.false66.us: - %add.i276.us = fadd double 0.000000e+00, 0.000001e+00 +cond.false66.us: ; preds = %cond.true48.us + %add.i276.us = fadd double 0.000000e+00, undef %add.i264.us = fadd double %add.i276.us, 0.000000e+00 - %add4.i267.us = fadd double 0.1e+00, 0xBFA5CC2D1960285F + %add4.i267.us = fadd double undef, 0xBFA5CC2D1960285F %mul.i254.us = fmul double %add.i264.us, 1.400000e+02 %mul2.i256.us = fmul double %add4.i267.us, 1.400000e+02 %add.i243.us = fadd double %mul.i254.us, 5.000000e+01 %add4.i246.us = fadd double %mul2.i256.us, 5.200000e+01 - %mul.i.i.us = fmul double 0.2e+00, %add.i264.us - %mul2.i.i.us = fmul double 0.3e+00, %add4.i267.us + %mul.i.i.us = fmul double undef, %add.i264.us + %mul2.i.i.us = fmul double undef, %add4.i267.us store double %add.i243.us, ptr undef, align 8 store double %add4.i246.us, ptr %agg.tmp99208.sroa.1.8.idx388, align 8 store double %mul.i.i.us, ptr %agg.tmp101211.sroa.0.0.idx, align 8 store double %mul2.i.i.us, ptr %agg.tmp101211.sroa.1.8.idx390, align 8 unreachable -cond.true63.us: +cond.true63.us: ; preds = %cond.true48.us unreachable -for.body42.lr.ph.us: +for.body42.lr.ph.us: ; preds = %for.cond36.preheader br i1 undef, label %cond.true48.us, label %cond.false51.us -_Z5clampd.exit.1: +_Z5clampd.exit.1: ; preds = %for.cond36.preheader br label %for.cond36.preheader } -define void @test() { -; CHECK-LABEL: @test( + +%struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601 = type { %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600, %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 } +%struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 = type { double, double, double } + +define void @_Z8radianceRK3RayiPt() #0 { +; CHECK-LABEL: @_Z8radianceRK3RayiPt( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: -; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double 6.000000e-02, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> , [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> , [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> , [[TMP6]] +; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], ptr undef, i64 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] ; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8 -; CHECK-NEXT: br label [[IF_THEN78]] +; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.then78: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: ; CHECK-NEXT: ret void ; entry: br i1 undef, label %if.then78, label %if.then38 -if.then38: - %mul.i.i790 = fmul double 0.0, 0.1 - %mul3.i.i792 = fmul double 0.2, 0.3 - %mul.i764 = fmul double 0.4, %mul3.i.i792 - %mul4.i767 = fmul double 0.5, 0.6 +if.then38: ; preds = %entry + %mul.i.i790 = fmul double undef, undef + %mul3.i.i792 = fmul double undef, undef + %mul.i764 = fmul double undef, %mul3.i.i792 + %mul4.i767 = fmul double undef, undef %sub.i768 = fsub double %mul.i764, %mul4.i767 - %mul6.i770 = fmul double 0.7, %mul.i.i790 - %mul9.i772 = fmul double 0.8, %mul3.i.i792 + %mul6.i770 = fmul double undef, %mul.i.i790 + %mul9.i772 = fmul double undef, %mul3.i.i792 %sub10.i773 = fsub double %mul6.i770, %mul9.i772 - %mul.i736 = fmul double 0.9, %sub.i768 - %mul2.i738 = fmul double 0.91, %sub10.i773 - %mul.i727 = fmul double 0.92, %mul.i736 - %mul2.i729 = fmul double 0.93, %mul2.i738 - %add.i716 = fadd double 0.94, %mul.i727 - %add4.i719 = fadd double 0.95, %mul2.i729 - %add.i695 = fadd double 0.96, %add.i716 - %add4.i698 = fadd double 0.97, %add4.i719 - %mul.i.i679 = fmul double 0.98, %add.i695 - %mul2.i.i680 = fmul double 0.99, %add4.i698 - %agg.tmp74663.sroa.0.0.idx = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 1, i32 0 + %mul.i736 = fmul double undef, %sub.i768 + %mul2.i738 = fmul double undef, %sub10.i773 + %mul.i727 = fmul double undef, %mul.i736 + %mul2.i729 = fmul double undef, %mul2.i738 + %add.i716 = fadd double undef, %mul.i727 + %add4.i719 = fadd double undef, %mul2.i729 + %add.i695 = fadd double undef, %add.i716 + %add4.i698 = fadd double undef, %add4.i719 + %mul.i.i679 = fmul double undef, %add.i695 + %mul2.i.i680 = fmul double undef, %add4.i698 + %agg.tmp74663.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601, ptr undef, i64 0, i32 1, i32 0 store double %mul.i.i679, ptr %agg.tmp74663.sroa.0.0.idx, align 8 - %agg.tmp74663.sroa.1.8.idx943 = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 1, i32 1 + %agg.tmp74663.sroa.1.8.idx943 = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601, ptr undef, i64 0, i32 1, i32 1 store double %mul2.i.i680, ptr %agg.tmp74663.sroa.1.8.idx943, align 8 - br label %if.then78 + br label %return + +if.then78: ; preds = %entry + br label %return -if.then78: +return: ; preds = %if.then78, %if.then38 ret void } +attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll index 74b9f31973d12..04d190bccfea4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll @@ -3,9 +3,12 @@ define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @g( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]] -; CHECK-NEXT: ret <2 x i8> [[TMP2]] +; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0 +; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[X0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[Y1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP2]], [[TMP2]] +; CHECK-NEXT: ret <2 x i8> [[TMP3]] ; %x0 = extractelement <2 x i8> %x, i32 0 %y1 = extractelement <2 x i8> %y, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll index 9276efb6e9cdc..b6535f9f2e696 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll @@ -3,9 +3,12 @@ define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @g( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]] -; CHECK-NEXT: ret <2 x i8> [[TMP2]] +; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0 +; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[X0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[Y1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP2]], [[TMP2]] +; CHECK-NEXT: ret <2 x i8> [[TMP3]] ; %x0 = extractelement <2 x i8> %x, i32 0 %y1 = extractelement <2 x i8> %y, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll index 8fde48dd3bb5a..7de9f4a3aee7c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll @@ -28,8 +28,8 @@ define void @fextr1(ptr %ptr) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, ptr undef, align 16 ; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[LD]], -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: store <2 x double> [[SHUFFLE]], ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -48,9 +48,12 @@ define void @fextr2(ptr %ptr) { ; CHECK-LABEL: @fextr2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <4 x double>, ptr undef, align 32 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[LD]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[TMP0]], -; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[V0:%.*]] = extractelement <4 x double> [[LD]], i32 0 +; CHECK-NEXT: [[V1:%.*]] = extractelement <4 x double> [[LD]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll index 35f521fbe8113..33959dc48f296 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll @@ -14,8 +14,10 @@ define float @multi_uses(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @multi_uses( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]] +; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Y1]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll index f5718e04df9ad..6bff6d9b45a3d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -82,19 +82,23 @@ define float @f_used_twice_in_tree(<2 x float> %x) { ; CHECK-NEXT: ret float [[ADD]] ; ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] -; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] +; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 +; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; THRESH1-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; THRESH1-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]] +; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( -; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] -; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] +; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 +; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; THRESH2-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; THRESH2-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]] +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll index e1c816fd383d7..482334e510f64 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll @@ -6,29 +6,32 @@ define i32 @foo(i32 %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[A:%.*]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 -; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1 +; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[OP_RDX11]], 0 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX11]], [[BB1]] ], [ 0, [[BB2:%.*]] ] +; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX12]], [[BB1]] ], [ 0, [[BB2:%.*]] ] ; CHECK-NEXT: ret i32 0 ; CHECK: bb4: -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP2]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) -; CHECK-NEXT: [[OP_RDX8:%.*]] = add i32 [[TMP10]], 0 -; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[OP_RDX8]], [[TMP3]] -; CHECK-NEXT: ret i32 [[OP_RDX9]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE8]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) +; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[TMP11]], 0 +; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[OP_RDX9]], [[TMP2]] +; CHECK-NEXT: ret i32 [[OP_RDX10]] ; CHECK: bb5: ; CHECK-NEXT: br label [[BB4:%.*]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll index f8fbcc58295a1..822db3a2f6ea5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll @@ -143,8 +143,8 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { ; PR41892 define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v2f32_store( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void @@ -220,8 +220,8 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 +; CHECK-NEXT: [[R021:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R021]], double [[R3]], i64 3 ; CHECK-NEXT: ret <4 x double> [[R03]] ; %a0 = extractelement <4 x double> %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index 5507cc9404861..8fd490a9e38eb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -143,8 +143,8 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { ; PR41892 define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v2f32_store( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> undef, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void @@ -220,8 +220,8 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 +; CHECK-NEXT: [[R021:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R021]], double [[R3]], i64 3 ; CHECK-NEXT: ret <4 x double> [[R03]] ; %a0 = extractelement <4 x double> %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll index d1350bc029019..24b0e545bd687 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll @@ -3,13 +3,22 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) { ; CHECK-LABEL: @simple_select( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: ret <4 x float> [[TMP6]] +; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP9]] ; %c0 = extractelement <4 x i32> %c, i32 0 %c1 = extractelement <4 x i32> %c, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll index 654121553b4cf..e266ce02962b1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -146,10 +146,10 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 ; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0 ; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> ; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 ; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> +; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> ; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] ; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] ; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 @@ -273,19 +273,37 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32 ; Unused insertelement define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_no_users( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[C3]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index d855dd6292c00..4be025a7324a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -180,10 +180,10 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 ; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0 ; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> ; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 ; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> +; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> ; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] ; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] ; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 @@ -307,19 +307,37 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32 ; Unused insertelement define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_no_users( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[C3]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll index 3fc6fe9e60611..6756d9aa2101e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll @@ -13,11 +13,11 @@ define <4 x double> @test(ptr %p2, double %i1754, double %i1781, double %i1778) ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[I1778:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[I1781]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[I1772]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x double> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> , double [[I1797]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x double> [[TMP5]], [[TMP6]] -; CHECK-NEXT: ret <4 x double> [[TMP7]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x double> [[TMP3]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> , double [[I1797]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x double> [[TMP4]], [[TMP5]] +; CHECK-NEXT: ret <4 x double> [[TMP6]] ; entry: %i1771 = getelementptr inbounds double, ptr %p2, i64 54 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 9ed021448e020..402e111c98598 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -10,10 +10,10 @@ define void @foo() personality ptr @bar { ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP8:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP10:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] ; CHECK-NEXT: ret void ; CHECK: bb3: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] ; CHECK: bb4: @@ -21,30 +21,32 @@ define void @foo() personality ptr @bar { ; CHECK: bb5: ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ , [[BB8:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ , [[BB8:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP5]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 1 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BB10]] ], [ [[TMP12:%.*]], [[BB12]] ] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ] +; CHECK-NEXT: [[TMP12]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index 469b15d074186..1ed2841a770db 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -23,14 +23,14 @@ define void @lookahead_basic(ptr %array) { ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 2 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 4 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[IDX4]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX6]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]] -; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr [[IDX4]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[IDX6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP8]] +; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[ARRAY]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -85,12 +85,12 @@ define void @lookahead_alt1(ptr %array) { ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 5 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP2]] -; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]] +; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[ARRAY]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -139,18 +139,18 @@ define void @lookahead_alt2(ptr %array) { ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 2 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 4 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[IDX4]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX6]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]] -; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr [[IDX4]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[IDX6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[TMP10]], [[TMP13]] +; CHECK-NEXT: store <2 x double> [[TMP14]], ptr [[ARRAY]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -207,22 +207,22 @@ define void @lookahead_external_uses(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S, ptr ; CHECK-NEXT: [[B0:%.*]] = load double, ptr [[B]], align 8 ; CHECK-NEXT: [[C0:%.*]] = load double, ptr [[C:%.*]], align 8 ; CHECK-NEXT: [[D0:%.*]] = load double, ptr [[D:%.*]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[B2:%.*]] = load double, ptr [[IDXB2]], align 8 ; CHECK-NEXT: [[A2:%.*]] = load double, ptr [[IDXA2]], align 8 ; CHECK-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[B1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP8]] -; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[S:%.*]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP0]], i32 1 -; CHECK-NEXT: store double [[TMP10]], ptr [[EXT1:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]] +; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[S:%.*]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: store double [[TMP12]], ptr [[EXT1:%.*]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -285,24 +285,24 @@ define void @lookahead_limit_users_budget(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S ; CHECK-NEXT: [[B0:%.*]] = load double, ptr [[B]], align 8 ; CHECK-NEXT: [[C0:%.*]] = load double, ptr [[C:%.*]], align 8 ; CHECK-NEXT: [[D0:%.*]] = load double, ptr [[D:%.*]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[B2:%.*]] = load double, ptr [[IDXB2]], align 8 ; CHECK-NEXT: [[A2:%.*]] = load double, ptr [[IDXA2]], align 8 ; CHECK-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[B1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP8]] -; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[S:%.*]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP0]], i32 1 -; CHECK-NEXT: store double [[TMP10]], ptr [[EXT1:%.*]], align 8 -; CHECK-NEXT: store double [[TMP10]], ptr [[EXT2:%.*]], align 8 -; CHECK-NEXT: store double [[TMP10]], ptr [[EXT3:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]] +; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[S:%.*]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: store double [[TMP12]], ptr [[EXT1:%.*]], align 8 +; CHECK-NEXT: store double [[TMP12]], ptr [[EXT2:%.*]], align 8 +; CHECK-NEXT: store double [[TMP12]], ptr [[EXT3:%.*]], align 8 ; CHECK-NEXT: store double [[B1]], ptr [[EXT4:%.*]], align 8 ; CHECK-NEXT: store double [[B1]], ptr [[EXT5:%.*]], align 8 ; CHECK-NEXT: ret void @@ -358,13 +358,13 @@ declare double @_ZN1i2axEv() define void @lookahead_crash(ptr %A, ptr %S, ptr %Arg0) { ; CHECK-LABEL: @lookahead_crash( -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 ; CHECK-NEXT: [[C0:%.*]] = call double @_ZN1i2ayEv(ptr [[ARG0:%.*]]) ; CHECK-NEXT: [[C1:%.*]] = call double @_ZN1i2axEv() -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[S:%.*]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[C1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[S:%.*]], align 8 ; CHECK-NEXT: ret void ; %IdxA1 = getelementptr inbounds double, ptr %A, i64 1 @@ -393,13 +393,13 @@ define void @ChecksExtractScores(ptr %storeArray, ptr %array, ptr %vecPtr1, ptr ; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4 ; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[LOADVEC2]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] -; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[STOREARRAY:%.*]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[LOADVEC]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[LOADVEC2]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[STOREARRAY:%.*]], align 8 ; CHECK-NEXT: ret void ; %idx1 = getelementptr inbounds double, ptr %array, i64 1 @@ -576,16 +576,22 @@ define void @ChecksExtractScores_different_vectors(ptr %storeArray, ptr %array, ; SSE-LABEL: @ChecksExtractScores_different_vectors( ; SSE-NEXT: [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4 ; SSE-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4 +; SSE-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0 +; SSE-NEXT: [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1 ; SSE-NEXT: [[LOADVEC3:%.*]] = load <2 x double>, ptr [[VECPTR3:%.*]], align 4 ; SSE-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, ptr [[VECPTR4:%.*]], align 4 -; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY:%.*]], align 4 -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[LOADVEC2]], <2 x double> [[LOADVEC3]], <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], [[TMP1]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[LOADVEC]], <2 x double> [[LOADVEC4]], <2 x i32> -; SSE-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], [[TMP1]] -; SSE-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP4]], [[TMP6]] -; SSE-NEXT: store <2 x double> [[TMP7]], ptr [[STOREARRAY:%.*]], align 8 +; SSE-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0 +; SSE-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1 +; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAY:%.*]], align 4 +; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0 +; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1 +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]] +; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 +; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1 +; SSE-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]] +; SSE-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP8]] +; SSE-NEXT: store <2 x double> [[TMP9]], ptr [[STOREARRAY:%.*]], align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @ChecksExtractScores_different_vectors( @@ -594,16 +600,22 @@ define void @ChecksExtractScores_different_vectors(ptr %storeArray, ptr %array, ; AVX-NEXT: [[LOADA1:%.*]] = load double, ptr [[IDX1]], align 4 ; AVX-NEXT: [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4 ; AVX-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4 +; AVX-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0 +; AVX-NEXT: [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1 ; AVX-NEXT: [[LOADVEC3:%.*]] = load <2 x double>, ptr [[VECPTR3:%.*]], align 4 ; AVX-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, ptr [[VECPTR4:%.*]], align 4 -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[LOADVEC]], <2 x double> [[LOADVEC2]], <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[LOADVEC3]], <2 x double> [[LOADVEC4]], <2 x i32> -; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]] +; AVX-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0 +; AVX-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1 +; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 +; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1 +; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE]] +; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[EXTRB1]], i32 1 +; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE1]] ; AVX-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]] ; AVX-NEXT: store <2 x double> [[TMP9]], ptr [[STOREARRAY:%.*]], align 8 ; AVX-NEXT: ret void @@ -639,13 +651,12 @@ define void @ChecksExtractScores_different_vectors(ptr %storeArray, ptr %array, define double @splat_loads(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) { ; SSE-LABEL: @splat_loads( ; SSE-NEXT: entry: -; SSE-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 -; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8 -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP0]], [[TMP4]] -; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] +; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 +; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8 +; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] ; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 ; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 ; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] @@ -656,17 +667,17 @@ define double @splat_loads(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) { ; AVX-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, ptr [[ARRAY2:%.*]], i64 1 ; AVX-NEXT: [[LD_2_0:%.*]] = load double, ptr [[ARRAY2]], align 8 ; AVX-NEXT: [[LD_2_1:%.*]] = load double, ptr [[GEP_2_1]], align 8 -; AVX-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 -; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]] +; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 +; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] ; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP0]], [[TMP5]] -; AVX-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] -; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP8]], [[TMP9]] +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] +; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] +; AVX-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] ; AVX-NEXT: ret double [[ADD3]] ; entry: @@ -696,18 +707,17 @@ entry: define double @splat_loads_with_internal_uses(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) { ; SSE-LABEL: @splat_loads_with_internal_uses( ; SSE-NEXT: entry: -; SSE-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 -; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8 -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP0]], [[TMP4]] -; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]] -; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 -; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 -; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] +; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 +; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8 +; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE1]] +; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]] ; SSE-NEXT: ret double [[RES]] ; ; AVX-LABEL: @splat_loads_with_internal_uses( @@ -715,18 +725,18 @@ define double @splat_loads_with_internal_uses(ptr %array1, ptr %array2, ptr %ptr ; AVX-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, ptr [[ARRAY2:%.*]], i64 1 ; AVX-NEXT: [[LD_2_0:%.*]] = load double, ptr [[ARRAY2]], align 8 ; AVX-NEXT: [[LD_2_1:%.*]] = load double, ptr [[GEP_2_1]], align 8 -; AVX-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 -; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]] +; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 +; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] ; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP0]], [[TMP5]] -; AVX-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] -; AVX-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], [[TMP2]] -; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 -; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 -; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] +; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] +; AVX-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE]] +; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]] ; AVX-NEXT: ret double [[RES]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll index 6000434ac1e95..6cf4a233c2d2d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll @@ -103,40 +103,57 @@ define i64 @test_3() #0 { ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB2:%.*]] ] -; CHECK-NEXT: [[VAL4:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> poison, i32 [[VAL4]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP1]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX7:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX8:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX9:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX10:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX11:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX12:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX13:%.*]] = mul i32 [[VAL4]], [[VAL4]] -; CHECK-NEXT: [[OP_RDX14:%.*]] = mul i32 [[OP_RDX]], [[OP_RDX1]] -; CHECK-NEXT: [[OP_RDX15:%.*]] = mul i32 [[OP_RDX2]], [[OP_RDX3]] -; CHECK-NEXT: [[OP_RDX16:%.*]] = mul i32 [[OP_RDX4]], [[OP_RDX5]] -; CHECK-NEXT: [[OP_RDX17:%.*]] = mul i32 [[OP_RDX6]], [[OP_RDX7]] -; CHECK-NEXT: [[OP_RDX18:%.*]] = mul i32 [[OP_RDX8]], [[OP_RDX9]] -; CHECK-NEXT: [[OP_RDX19:%.*]] = mul i32 [[OP_RDX10]], [[OP_RDX11]] -; CHECK-NEXT: [[OP_RDX20:%.*]] = mul i32 [[OP_RDX12]], [[OP_RDX13]] -; CHECK-NEXT: [[OP_RDX21:%.*]] = mul i32 [[OP_RDX14]], [[OP_RDX15]] -; CHECK-NEXT: [[OP_RDX22:%.*]] = mul i32 [[OP_RDX16]], [[OP_RDX17]] -; CHECK-NEXT: [[OP_RDX23:%.*]] = mul i32 [[OP_RDX18]], [[OP_RDX19]] -; CHECK-NEXT: [[OP_RDX24:%.*]] = mul i32 [[OP_RDX20]], [[VAL]] -; CHECK-NEXT: [[OP_RDX25:%.*]] = mul i32 [[OP_RDX21]], [[OP_RDX22]] -; CHECK-NEXT: [[OP_RDX26:%.*]] = mul i32 [[OP_RDX23]], [[OP_RDX24]] -; CHECK-NEXT: [[OP_RDX27:%.*]] = mul i32 [[OP_RDX25]], [[OP_RDX26]] -; CHECK-NEXT: [[VAL64:%.*]] = add i32 undef, [[OP_RDX27]] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ undef, [[BB1]] ], [ poison, [[BB2:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <32 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <32 x i32> [[TMP3]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <32 x i32> [[TMP4]], i32 [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <32 x i32> [[TMP5]], i32 [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <32 x i32> [[TMP6]], i32 [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <32 x i32> [[TMP7]], i32 [[TMP2]], i32 5 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <32 x i32> [[TMP8]], i32 [[TMP2]], i32 6 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <32 x i32> [[TMP9]], i32 [[TMP2]], i32 7 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <32 x i32> [[TMP10]], i32 [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <32 x i32> [[TMP11]], i32 [[TMP2]], i32 9 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 [[TMP2]], i32 10 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <32 x i32> [[TMP13]], i32 [[TMP2]], i32 11 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <32 x i32> [[TMP14]], i32 [[TMP2]], i32 12 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <32 x i32> [[TMP15]], i32 [[TMP2]], i32 13 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <32 x i32> [[TMP16]], i32 [[TMP2]], i32 14 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <32 x i32> [[TMP17]], i32 [[TMP2]], i32 15 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <32 x i32> [[TMP18]], i32 [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <32 x i32> [[TMP19]], i32 [[TMP2]], i32 17 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <32 x i32> [[TMP20]], i32 [[TMP2]], i32 18 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <32 x i32> [[TMP21]], i32 [[TMP2]], i32 19 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <32 x i32> [[TMP22]], i32 [[TMP2]], i32 20 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <32 x i32> [[TMP23]], i32 [[TMP2]], i32 21 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <32 x i32> [[TMP24]], i32 [[TMP2]], i32 22 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <32 x i32> [[TMP25]], i32 [[TMP2]], i32 23 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <32 x i32> [[TMP26]], i32 [[TMP2]], i32 24 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <32 x i32> [[TMP27]], i32 [[TMP2]], i32 25 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <32 x i32> [[TMP28]], i32 [[TMP2]], i32 26 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <32 x i32> [[TMP29]], i32 [[TMP2]], i32 27 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <32 x i32> [[TMP30]], i32 [[TMP2]], i32 28 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <32 x i32> [[TMP31]], i32 [[TMP2]], i32 29 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <32 x i32> [[TMP32]], i32 [[TMP2]], i32 30 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <32 x i32> [[TMP33]], i32 [[TMP2]], i32 31 +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP34]]) +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX7:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]] +; CHECK-NEXT: [[OP_RDX8:%.*]] = mul i32 [[OP_RDX3]], [[OP_RDX4]] +; CHECK-NEXT: [[OP_RDX9:%.*]] = mul i32 [[OP_RDX5]], [[OP_RDX6]] +; CHECK-NEXT: [[OP_RDX10:%.*]] = mul i32 [[OP_RDX7]], [[OP_RDX8]] +; CHECK-NEXT: [[OP_RDX11:%.*]] = mul i32 [[OP_RDX9]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX12:%.*]] = mul i32 [[OP_RDX10]], [[OP_RDX11]] +; CHECK-NEXT: [[VAL64:%.*]] = add i32 undef, [[OP_RDX12]] ; CHECK-NEXT: [[VAL65:%.*]] = sext i32 [[VAL64]] to i64 ; CHECK-NEXT: ret i64 [[VAL65]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll index 4795ac6559203..32ad5fd80cf57 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll @@ -63,8 +63,11 @@ define i32 @foo(ptr noalias nocapture %A, ptr noalias nocapture %B, float %T) { ; PR41892 define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v2f32_store( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[X0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[X1]], i32 1 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; %x0 = extractelement <4 x float> %f, i64 0 @@ -92,10 +95,13 @@ define void @test_v4f32_v2f32_splat_store(<4 x float> %f, ptr %p){ define void @test_v4f32_v3f32_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v3f32_store( -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F:%.*]], i64 2 +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[P]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[X0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[X1]], i32 1 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[P]], align 4 ; CHECK-NEXT: store float [[X2]], ptr [[P2]], align 4 ; CHECK-NEXT: ret void ; @@ -150,8 +156,10 @@ define void @test_v4f32_v4f32_store(<4 x float> %f, ptr %p){ define void @test_v4f32_v4f32_splat_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v4f32_splat_store( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: store <4 x float> [[SHUFFLE]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; %x0 = extractelement <4 x float> %f, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index 8284fc404ca18..3850b42327857 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -71,19 +71,12 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load( -; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 -; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 -; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 -; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 -; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 +; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> +; AVX512VL-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0:![0-9]+]] +; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], +; AVX512VL-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -181,11 +174,11 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado ; ; AVX512VL-LABEL: @gather_load_2( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x ptr> [[TMP4]], <4 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], -; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> +; AVX512VL-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], +; AVX512VL-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -311,20 +304,20 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; ; AVX512F-LABEL: @gather_load_3( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], -; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> +; AVX512F-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP5]], +; AVX512F-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_3( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], -; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> +; AVX512VL-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP5]], +; AVX512VL-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load i32, ptr %1, align 4, !tbaa !2 @@ -468,20 +461,20 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; ; AVX512F-LABEL: @gather_load_4( ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 -; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], -; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> +; AVX512F-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], +; AVX512F-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_4( ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], -; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> +; AVX512VL-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], +; AVX512VL-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %t5 = getelementptr inbounds i32, ptr %t0, i64 1 @@ -558,15 +551,14 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP18]], i64 3 ; SSE-NEXT: [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]] ; SSE-NEXT: store <4 x float> [[TMP27]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; SSE-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; SSE-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 -; SSE-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 +; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 +; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 +; SSE-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 +; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 +; SSE-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 +; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 ; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4, !tbaa [[TBAA0]] @@ -574,16 +566,17 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP35]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP44:%.*]] = insertelement <4 x float> poison, float [[TMP36]], i64 0 -; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP38]], i64 1 -; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP40]], i64 2 -; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP42]], i64 3 -; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0 -; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[TMP48]], float [[TMP39]], i64 1 -; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP41]], i64 2 -; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP43]], i64 3 -; SSE-NEXT: [[TMP52:%.*]] = fdiv <4 x float> [[TMP47]], [[TMP51]] -; SSE-NEXT: store <4 x float> [[TMP52]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP44:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0 +; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP39]], i64 1 +; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP41]], i64 2 +; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP43]], i64 3 +; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> poison, float [[TMP38]], i64 0 +; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP40]], i64 1 +; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP42]], i64 2 +; SSE-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP44]], i64 3 +; SSE-NEXT: [[TMP53:%.*]] = fdiv <4 x float> [[TMP48]], [[TMP52]] +; SSE-NEXT: store <4 x float> [[TMP53]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: ret void ; ; AVX-LABEL: @gather_load_div( @@ -692,24 +685,24 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; ; AVX512F-LABEL: @gather_load_div( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] -; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]] +; AVX512F-NEXT: store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_div( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] -; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]] +; AVX512VL-NEXT: store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load float, ptr %1, align 4, !tbaa !2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index 7597cf14f7651..a1d08c987541b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -71,19 +71,12 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load( -; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 -; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 -; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 -; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i64 0 -; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 -; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 -; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 -; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], -; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 +; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> +; AVX512VL-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0:![0-9]+]] +; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], +; AVX512VL-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -181,11 +174,11 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado ; ; AVX512VL-LABEL: @gather_load_2( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x ptr> [[TMP4]], <4 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], -; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> +; AVX512VL-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], +; AVX512VL-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -311,20 +304,20 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; ; AVX512F-LABEL: @gather_load_3( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], -; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> +; AVX512F-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP5]], +; AVX512F-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_3( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], -; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> +; AVX512VL-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP5]], +; AVX512VL-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load i32, ptr %1, align 4, !tbaa !2 @@ -468,20 +461,20 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; ; AVX512F-LABEL: @gather_load_4( ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 -; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], -; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> +; AVX512F-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], +; AVX512F-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_4( ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], -; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> +; AVX512VL-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], +; AVX512VL-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %t5 = getelementptr inbounds i32, ptr %t0, i64 1 @@ -558,15 +551,14 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP18]], i64 3 ; SSE-NEXT: [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]] ; SSE-NEXT: store <4 x float> [[TMP27]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; SSE-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; SSE-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 -; SSE-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 +; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 +; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 +; SSE-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 +; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 +; SSE-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 +; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 ; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4, !tbaa [[TBAA0]] @@ -574,16 +566,17 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP35]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP44:%.*]] = insertelement <4 x float> poison, float [[TMP36]], i64 0 -; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP38]], i64 1 -; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP40]], i64 2 -; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP42]], i64 3 -; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0 -; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[TMP48]], float [[TMP39]], i64 1 -; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP41]], i64 2 -; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP43]], i64 3 -; SSE-NEXT: [[TMP52:%.*]] = fdiv <4 x float> [[TMP47]], [[TMP51]] -; SSE-NEXT: store <4 x float> [[TMP52]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP44:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0 +; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP39]], i64 1 +; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP41]], i64 2 +; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP43]], i64 3 +; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> poison, float [[TMP38]], i64 0 +; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP40]], i64 1 +; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP42]], i64 2 +; SSE-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP44]], i64 3 +; SSE-NEXT: [[TMP53:%.*]] = fdiv <4 x float> [[TMP48]], [[TMP52]] +; SSE-NEXT: store <4 x float> [[TMP53]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: ret void ; ; AVX-LABEL: @gather_load_div( @@ -692,24 +685,24 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; ; AVX512F-LABEL: @gather_load_div( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] -; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]] +; AVX512F-NEXT: store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_div( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] -; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]] +; AVX512VL-NEXT: store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load float, ptr %1, align 4, !tbaa !2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll index d91470c50992e..ccc559d30b6b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll @@ -9,7 +9,7 @@ define dso_local <4 x float> @foo(<4 x i32> %0) { ; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP8]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 6e0aafeacd309..94dfb4de871b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -95,16 +95,19 @@ define i1 @logical_or_fcmp(<4 x float> %x) { define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) { ; SSE-LABEL: @logical_and_icmp_diff_preds( ; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 +; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 ; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 +; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 ; SSE-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0 ; SSE-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer -; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP3]], i1 false +; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X3]], i32 0 +; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[X1]], i32 1 +; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], zeroinitializer +; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP4]], i1 false ; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP4]], i1 false +; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false ; SSE-NEXT: ret i1 [[S3]] ; ; AVX-LABEL: @logical_and_icmp_diff_preds( @@ -184,13 +187,16 @@ define i1 @mixed_logical_icmp(<4 x i32> %x) { define i1 @logical_and_icmp_subvec(<4 x i32> %x) { ; SSE-LABEL: @logical_and_icmp_subvec( -; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer +; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 +; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 +; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X1]], i32 0 +; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[X0]], i32 1 +; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], zeroinitializer ; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0 -; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; SSE-NEXT: [[S1:%.*]] = select i1 [[TMP4]], i1 [[TMP3]], i1 false +; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; SSE-NEXT: [[S1:%.*]] = select i1 [[TMP5]], i1 [[TMP4]], i1 false ; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false ; SSE-NEXT: ret i1 [[S2]] ; @@ -222,13 +228,13 @@ define i1 @logical_and_icmp_subvec(<4 x i32> %x) { define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) -; CHECK-NEXT: ret i1 [[TMP6]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]]) +; CHECK-NEXT: ret i1 [[TMP5]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -254,15 +260,15 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6 -; CHECK-NEXT: call void @use1(i1 [[TMP5]]) -; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]]) -; CHECK-NEXT: ret i1 [[TMP7]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6 +; CHECK-NEXT: call void @use1(i1 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) +; CHECK-NEXT: ret i1 [[TMP6]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -329,20 +335,27 @@ define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32( +; CHECK-NEXT: [[X0:%.*]] = extractelement <8 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <8 x i32> [[X]], i32 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <8 x i32> [[X]], i32 2 +; CHECK-NEXT: [[X3:%.*]] = extractelement <8 x i32> [[X]], i32 3 ; CHECK-NEXT: [[Y0:%.*]] = extractelement <8 x i32> [[Y:%.*]], i32 0 ; CHECK-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1 ; CHECK-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2 ; CHECK-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> , i32 [[Y0]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[Y1]], i32 5 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[Y2]], i32 6 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y3]], i32 7 -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP2]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]]) -; CHECK-NEXT: ret i1 [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[X0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[X2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[X3]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> , i32 [[Y0]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y1]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[Y2]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[Y3]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = freeze <8 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP10]]) +; CHECK-NEXT: ret i1 [[TMP11]] ; %x0 = extractelement <8 x i32> %x, i32 0 %x1 = extractelement <8 x i32> %x, i32 1 @@ -373,18 +386,21 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { ; SSE-LABEL: @logical_and_icmp_clamp_partial( ; SSE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], +; SSE-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; SSE-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 0 +; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 +; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1 +; SSE-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[TMP5]], ; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42 -; SSE-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], -; SSE-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] -; SSE-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) -; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 -; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false -; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; SSE-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP8]], i1 [[C2]], i1 false -; SSE-NEXT: [[TMP9:%.*]] = freeze i1 [[OP_RDX]] -; SSE-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP9]], i1 [[OP_RDX1]], i1 false +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[X]], +; SSE-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]] +; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]]) +; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 +; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP9]], i1 [[TMP10]], i1 false +; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 +; SSE-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP11]], i1 [[C2]], i1 false +; SSE-NEXT: [[TMP12:%.*]] = freeze i1 [[OP_RDX]] +; SSE-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP12]], i1 [[OP_RDX1]], i1 false ; SSE-NEXT: ret i1 [[OP_RDX2]] ; ; AVX-LABEL: @logical_and_icmp_clamp_partial( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll index 3daebe50d724f..94aa4449c6797 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll @@ -11,9 +11,15 @@ define i64 @test() { ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB2:%.*]] ], [ 0, [[BB1:%.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 0, [[BB1]] ] ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP4]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP44:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP44]], i32 [[TMP4]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP4]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP7]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP8]], [[TMP4]] ; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[TMP4]], [[TMP4]] ; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[OP_RDX]], [[OP_RDX1]] ; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[OP_RDX2]], [[TMP]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index d11eb609b12d0..e8ca9aeb8147c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -8,9 +8,9 @@ define void @fextr(ptr %ptr) { ; CHECK-NEXT: [[LD:%.*]] = load <8 x i16>, ptr undef, align 16 ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[LD]], [[TMP0]] -; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: store <8 x i16> [[TMP0]], ptr [[PTR:%.*]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll index 5cf0dc6a40a81..24334c43da684 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll @@ -3,19 +3,23 @@ define void @test(ptr %p, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[E:%.*]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[F:%.*]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[C:%.*]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[D:%.*]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], [[TMP9]] -; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[S1:%.*]] = sub i32 [[A:%.*]], 1 +; CHECK-NEXT: [[S2:%.*]] = sub i32 [[B:%.*]], 1 +; CHECK-NEXT: [[M1:%.*]] = mul i32 [[S1]], [[E:%.*]] +; CHECK-NEXT: [[M2:%.*]] = mul i32 [[S2]], [[F:%.*]] +; CHECK-NEXT: [[M3:%.*]] = mul i32 [[S1]], [[C:%.*]] +; CHECK-NEXT: [[M4:%.*]] = mul i32 [[S2]], [[D:%.*]] +; CHECK-NEXT: [[A1:%.*]] = add i32 [[A]], [[M1]] +; CHECK-NEXT: [[A2:%.*]] = add i32 [[B]], [[M2]] +; CHECK-NEXT: [[A3:%.*]] = add i32 [[C]], [[M3]] +; CHECK-NEXT: [[A4:%.*]] = add i32 [[D]], [[M4]] +; CHECK-NEXT: store i32 [[A1]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 1 +; CHECK-NEXT: store i32 [[A2]], ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 2 +; CHECK-NEXT: store i32 [[A3]], ptr [[GEP1]], align 4 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 3 +; CHECK-NEXT: store i32 [[A4]], ptr [[GEP2]], align 4 ; CHECK-NEXT: ret void ; %s1 = sub i32 %a, 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll index 8dbb79cd81617..ec555f7e7aacc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-14 | FileCheck %s +; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-13 | FileCheck %s define void @test(i1 %c, ptr %arg) { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll index 3b4938f220fdd..c34a91f6be7ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll @@ -13,16 +13,16 @@ define void @test(ptr %a, ptr %b) { ; CHECK: for.body: ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr null, align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], zeroinitializer -; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[RESULT]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[SHUFFLE]], [[TMP6]] +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[SHUFFLE]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[SHUFFLE2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[SHUFFLE1]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP9]], zeroinitializer +; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[RESULT]], align 4 ; CHECK-NEXT: br label [[FOR_BODY]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index aa3c2be7dc9c2..666a2ec85c0fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -12,22 +12,24 @@ define void @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP10:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: ; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double> ; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double -; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]] -; CHECK-NEXT: [[SUB1:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> , double [[SUB1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x double> [[TMP6]], [[TMP4]] -; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <4 x double> [[TMP6]] to <4 x float> -; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP2]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> , double [[CONV2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP10]] = phi <4 x float> [ [[TMP9]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: