diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 20b0360ae1daf..80e60ff0c8af1 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1182,6 +1182,7 @@ class BoUpSLP { MinBWs.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; + PostponedGathers.clear(); } unsigned getTreeSize() const { return VectorizableTree.size(); } @@ -2938,6 +2939,11 @@ class BoUpSLP { /// pre-gather them before. DenseMap EntryToLastInstruction; + /// List of gather nodes, depending on other gather/vector nodes, which should + /// be emitted after the vector instruction emission process to correctly + /// handle order of the vector instructions and shuffles. + SetVector PostponedGathers; + /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { ExternalUser(Value *S, llvm::User *U, int L) @@ -6989,7 +6995,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, return TTI::TCC_Free; // Add broadcast for non-identity shuffle only. bool NeedShuffle = - VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof); + count(VL, *It) > 1 && + (VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof)); InstructionCost InsertCost = TTI->getVectorInstrCost( Instruction::InsertElement, VecTy, CostKind, NeedShuffle ? 0 : std::distance(VL.begin(), It), @@ -6999,7 +7006,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, TargetTransformInfo::SK_Broadcast, VecTy, /*Mask=*/std::nullopt, CostKind, /*Index=*/0, - /*SubTp=*/nullptr, /*Args=*/VL[0]) + /*SubTp=*/nullptr, /*Args=*/*It) : TTI::TCC_Free); } InstructionCost ReuseShuffleCost = 0; @@ -8396,7 +8403,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, // have a permutation of 2 input vectors. SmallVector> UsedTEs; DenseMap UsedValuesEntry; - for (Value *V : TE->Scalars) { + for (Value *V : VL) { if (isConstant(V)) continue; // Build a list of tree entries where V is used. @@ -8404,8 +8411,12 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, auto It = ValueToTEs.find(V); if (It != ValueToTEs.end()) VToTEs = It->second; - if (const TreeEntry *VTE = getTreeEntry(V)) + if (const TreeEntry *VTE = getTreeEntry(V)) { + Instruction &EntryUserInst = getLastInstructionInBundle(VTE); + if (&EntryUserInst == &UserInst || !CheckOrdering(&EntryUserInst)) + continue; VToTEs.insert(VTE); + } if (VToTEs.empty()) continue; if (UsedTEs.empty()) { @@ -8461,7 +8472,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, auto *It = find_if(FirstEntries, [=](const TreeEntry *EntryPtr) { return EntryPtr->isSame(VL) || EntryPtr->isSame(TE->Scalars); }); - if (It != FirstEntries.end()) { + if (It != FirstEntries.end() && (*It)->getVectorFactor() == VL.size()) { Entries.push_back(*It); std::iota(Mask.begin(), Mask.end(), 0); // Clear undef scalars. @@ -8503,10 +8514,18 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef VL, break; } } - // No 2 source vectors with the same vector factor - give up and do regular - // gather. - if (Entries.empty()) - return std::nullopt; + // No 2 source vectors with the same vector factor - just choose 2 with max + // index. + if (Entries.empty()) { + Entries.push_back( + *std::max_element(UsedTEs.front().begin(), UsedTEs.front().end(), + [](const TreeEntry *TE1, const TreeEntry *TE2) { + return TE1->Idx < TE2->Idx; + })); + Entries.push_back(SecondEntries.front()); + VF = std::max(Entries.front()->getVectorFactor(), + Entries.back()->getVectorFactor()); + } } bool IsSplatOrUndefs = isSplat(VL) || all_of(VL, UndefValue::classof); @@ -9377,14 +9396,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { }; if (E->State == TreeEntry::NeedToGather) { - if (E->Idx > 0) { - // We are in the middle of a vectorizable chain. We need to gather the - // scalars from the users. - Value *Vec = createBuildVector(E); - E->VectorizedValue = Vec; - return Vec; - } - if (E->getMainOp()) + if (E->getMainOp() && E->Idx == 0) setInsertPointAfterBundle(E); unsigned VF = E->getVectorFactor(); auto AdjustExtracts = [&](const TreeEntry *E, ArrayRef Mask) { @@ -9395,20 +9407,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { continue; auto *EI = cast(E->Scalars[I]); VecBase = EI->getVectorOperand(); - // If all users are vectorized - can delete the extractelement - // itself. - if (any_of(EI->users(), - [&](User *U) { return !ScalarToTreeEntry.count(U); })) - continue; - eraseInstruction(EI); + // TODO: EI can be erased, if all its users are vectorized. But need to + // emit shuffles for such extractelement instructions. } return VecBase; }; auto CreateShuffle = [&](Value *V1, Value *V2, ArrayRef Mask) { - if (V1->getType() != V2->getType()) { - unsigned VF1 = cast(V1->getType())->getNumElements(); - unsigned VF2 = cast(V2->getType())->getNumElements(); - unsigned VF = std::max(VF1, VF2); + unsigned VF1 = cast(V1->getType())->getNumElements(); + unsigned VF2 = cast(V2->getType())->getNumElements(); + unsigned VF = std::max(VF1, VF2); + if (VF1 != VF2) { SmallVector ExtMask(VF, UndefMaskElem); std::iota(ExtMask.begin(), std::next(ExtMask.begin(), std::min(VF1, VF2)), 0); @@ -9426,16 +9434,39 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } } } - Value *Vec = Builder.CreateShuffleVector(V1, V2, Mask); + const int Limit = Mask.size() * 2; + if (V1 == V2 && Mask.size() == VF && + all_of(Mask, [=](int Idx) { return Idx < Limit; }) && + (ShuffleVectorInst::isIdentityMask(Mask) || + (ShuffleVectorInst::isZeroEltSplatMask(Mask) && + isa(V1) && + cast(V1)->getShuffleMask() == Mask))) + return V1; + Value *Vec = V1 == V2 ? Builder.CreateShuffleVector(V1, Mask) + : Builder.CreateShuffleVector(V1, V2, Mask); if (auto *I = dyn_cast(Vec)) { GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } return Vec; }; + auto NeedToDelay = [=](const TreeEntry *E, + ArrayRef Deps) -> Value * { + // No need to delay emission if all deps are ready. + if (all_of(Deps, [](const TreeEntry *TE) { return TE->VectorizedValue; })) + return nullptr; + // Postpone gather emission, will be emitted after the end of the + // process to keep correct order. + auto *VecTy = FixedVectorType::get(E->Scalars.front()->getType(), + E->getVectorFactor()); + Value *Vec = Builder.CreateAlignedLoad( + VecTy, PoisonValue::get(VecTy->getPointerTo()), MaybeAlign()); + return Vec; + }; - SmallVector ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(), - E->ReuseShuffleIndices.end()); + SmallVector + ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(), + E->ReuseShuffleIndices.end()); SmallVector GatheredScalars(E->Scalars.begin(), E->Scalars.end()); // Build a mask out of the reorder indices and reorder scalars per this // mask. @@ -9450,40 +9481,53 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { std::optional GatherShuffle; SmallVector Entries; Type *ScalarTy = GatheredScalars.front()->getType(); - // Check for gathered extracts. - ExtractShuffle = tryToGatherExtractElements(GatheredScalars, ExtractMask); - SmallVector IgnoredVals; - if (UserIgnoreList) - IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end()); - bool Resized = false; - if (Value *VecBase = AdjustExtracts(E, ExtractMask)) - if (auto *VecBaseTy = dyn_cast(VecBase->getType())) - if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) { - Resized = true; - GatheredScalars.append(VF - GatheredScalars.size(), - PoisonValue::get(ScalarTy)); - } - // Gather extracts after we check for full matched gathers only. - if (ExtractShuffle || E->getOpcode() != Instruction::Load || - E->isAltShuffle() || - all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || - isSplat(E->Scalars) || - (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) - GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); - if (GatherShuffle) { - assert((Entries.size() == 1 || Entries.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - if (!Resized) { - unsigned VF1 = Entries.front()->getVectorFactor(); - unsigned VF2 = Entries.back()->getVectorFactor(); - if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) - GatheredScalars.append(VF - GatheredScalars.size(), - PoisonValue::get(ScalarTy)); + if (!all_of(GatheredScalars, UndefValue::classof)) { + // Check for gathered extracts. + ExtractShuffle = tryToGatherExtractElements(GatheredScalars, ExtractMask); + SmallVector IgnoredVals; + if (UserIgnoreList) + IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end()); + bool Resized = false; + if (Value *VecBase = AdjustExtracts(E, ExtractMask)) + if (auto *VecBaseTy = dyn_cast(VecBase->getType())) + if (VF == VecBaseTy->getNumElements() && + GatheredScalars.size() != VF) { + Resized = true; + GatheredScalars.append(VF - GatheredScalars.size(), + PoisonValue::get(ScalarTy)); + } + // Gather extracts after we check for full matched gathers only. + if (ExtractShuffle || E->getOpcode() != Instruction::Load || + E->isAltShuffle() || + all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || + isSplat(E->Scalars) || + (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { + GatherShuffle = + isGatherShuffledEntry(E, GatheredScalars, Mask, Entries); } - // Remove shuffled elements from list of gathers. - for (int I = 0, Sz = Mask.size(); I < Sz; ++I) { - if (Mask[I] != UndefMaskElem) - GatheredScalars[I] = PoisonValue::get(ScalarTy); + if (GatherShuffle) { + if (Value *Delayed = NeedToDelay(E, Entries)) { + E->VectorizedValue = Delayed; + // Delay emission of gathers which are not ready yet. + PostponedGathers.insert(E); + // Postpone gather emission, will be emitted after the end of the + // process to keep correct order. + return Delayed; + } + assert((Entries.size() == 1 || Entries.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + if (!Resized) { + unsigned VF1 = Entries.front()->getVectorFactor(); + unsigned VF2 = Entries.back()->getVectorFactor(); + if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF) + GatheredScalars.append(VF - GatheredScalars.size(), + PoisonValue::get(ScalarTy)); + } + // Remove shuffled elements from list of gathers. + for (int I = 0, Sz = Mask.size(); I < Sz; ++I) { + if (Mask[I] != UndefMaskElem) + GatheredScalars[I] = PoisonValue::get(ScalarTy); + } } } if ((ExtractShuffle || GatherShuffle) && @@ -9515,6 +9559,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Vec1 = CreateShuffle(Vec1, Vec2, ExtractMask); else if (Vec1) Vec1 = CreateShuffle(Vec1, Vec1, ExtractMask); + else + Vec1 = PoisonValue::get( + FixedVectorType::get(ScalarTy, GatheredScalars.size())); } if (GatherShuffle) { Vec = CreateShuffle(Entries.front()->VectorizedValue, @@ -9532,10 +9579,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } else { Vec = Vec1; } + Vec = FinalShuffle(Vec, E); } else { - Vec = gather(E->Scalars); + Vec = createBuildVector(E); } - Vec = FinalShuffle(Vec, E); E->VectorizedValue = Vec; return Vec; } @@ -10177,6 +10224,27 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues, Builder.SetInsertPoint(ReductionRoot ? ReductionRoot : &F->getEntryBlock().front()); auto *VectorRoot = vectorizeTree(VectorizableTree[0].get()); + // Run through the list of postponed gathers and emit them, replacing the temp + // emitted allocas with actual vector instructions. + ArrayRef PostponedNodes = PostponedGathers.getArrayRef(); + for (const TreeEntry *E : PostponedNodes) { + auto *TE = const_cast(E); + if (auto *VecTE = getTreeEntry(TE->Scalars.front())) + if (VecTE->isSame(TE->UserTreeIndices.front().UserTE->getOperand( + TE->UserTreeIndices.front().EdgeIdx))) + // Found gather node which is absolutely the same as one of the + // vectorized nodes. It may happen after reordering. + continue; + auto *PrevVec = cast(TE->VectorizedValue); + TE->VectorizedValue = nullptr; + auto *UserI = + cast(TE->UserTreeIndices.front().UserTE->VectorizedValue); + Builder.SetInsertPoint(PrevVec); + Builder.SetCurrentDebugLocation(UserI->getDebugLoc()); + Value *Vec = vectorizeTree(TE); + PrevVec->replaceAllUsesWith(Vec); + eraseInstruction(PrevVec); + } // If the vectorized tree can be rewritten in a smaller type, we truncate the // vectorized root. InstCombine will then rewrite the entire expression. We diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll index 5e8616673bbcd..ee24944e5cec6 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll @@ -24,13 +24,10 @@ define <4 x float> @int_sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -220,13 +217,10 @@ define <4 x float> @exp_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -303,13 +297,10 @@ define <4 x float> @log_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -479,13 +470,10 @@ define <4 x float> @sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -521,13 +509,10 @@ define <4 x float> @cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -1012,13 +997,10 @@ define <4 x float> @int_cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll index 4df52503e1e2c..2bac49a58cd47 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll @@ -24,13 +24,10 @@ define <4 x float> @int_sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -220,13 +217,10 @@ define <4 x float> @exp_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -303,13 +297,10 @@ define <4 x float> @log_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -479,13 +470,10 @@ define <4 x float> @sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -521,13 +509,10 @@ define <4 x float> @cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: @@ -1012,13 +997,10 @@ define <4 x float> @int_cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1 -; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]]) -; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll index 1cdac15dba943..2af7efb3d81fe 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -13,16 +13,13 @@ define void @noop_extracts_first_2_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <2 x double>, ptr [[PTR_1:%.*]], align 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[V2_LANE_3:%.*]] = extractelement <4 x double> [[V_2]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V2_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[V_1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[V_1]], i32 0 +; CHECK-NEXT: call void @use(double [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 1 ; CHECK-NEXT: call void @use(double [[TMP3]]) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1 -; CHECK-NEXT: call void @use(double [[TMP4]]) -; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -56,15 +53,12 @@ define void @extracts_first_2_lanes_different_vectors(ptr %ptr.1, ptr %ptr.2, pt ; CHECK-NEXT: [[V_3:%.*]] = load <2 x double>, ptr [[PTR_3:%.*]], align 8 ; CHECK-NEXT: [[V3_LANE_1:%.*]] = extractelement <2 x double> [[V_3]], i32 1 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V3_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[V_1]], <2 x double> [[V_3]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V3_LANE_1]]) -; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -98,16 +92,13 @@ define void @noop_extract_second_2_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <4 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <4 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -137,10 +128,8 @@ define void @extract_reverse_order(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <2 x double>, ptr [[PTR_1:%.*]], align 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0 ; CHECK-NEXT: call void @use(double [[TMP3]]) @@ -178,16 +167,13 @@ define void @extract_lanes_1_and_2(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_1:%.*]] = extractelement <4 x double> [[V_1]], i32 1 ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <4 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_1]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) -; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -223,23 +209,17 @@ define void @noop_extracts_existing_vector_4_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> poison, double [[V1_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[V1_LANE_0]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_1]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_0]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP7]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP4]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -279,24 +259,15 @@ define void @extracts_jumbled_4_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 -; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 -; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> poison, double [[V1_LANE_0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[V1_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[V1_LANE_1]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_3]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_0]], i32 3 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP8]], ptr [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP3]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -333,51 +304,22 @@ define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-LABEL: @noop_extracts_9_lanes( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8 -; CHECK-NEXT: [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0 -; CHECK-NEXT: [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1 ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 -; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 -; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4 ; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 -; CHECK-NEXT: [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6 -; CHECK-NEXT: [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7 -; CHECK-NEXT: [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 -; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_3]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_4]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_5]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_6]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_7]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_8]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_0]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_1]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -450,47 +392,22 @@ define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-LABEL: @first_mul_chain_jumbled( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8 -; CHECK-NEXT: [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0 -; CHECK-NEXT: [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1 ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 -; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 -; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4 ; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 -; CHECK-NEXT: [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6 -; CHECK-NEXT: [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7 -; CHECK-NEXT: [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 -; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_4]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_6]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_5]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_8]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_7]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_1]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_0]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_1]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_0]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_2]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fmul <8 x double> [[TMP4]], [[TMP1]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP22]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP6]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -563,51 +480,23 @@ define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-LABEL: @first_and_second_mul_chain_jumbled( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8 -; CHECK-NEXT: [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0 -; CHECK-NEXT: [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1 ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2 -; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4 -; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 -; CHECK-NEXT: [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6 -; CHECK-NEXT: [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7 -; CHECK-NEXT: [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 -; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_4]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_5]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_6]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_8]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_7]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_1]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_0]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_6]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_1]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_0]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_3]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_2]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_5]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll index 6fd4b14eb799e..e6d80675635a2 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll @@ -244,8 +244,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { ; GFX8-NEXT: bb: ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> @@ -291,11 +291,11 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) ; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> ; GFX8-NEXT: ret <4 x i16> [[INS_31]] diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll index 24fee6387beb5..e99be89ef9bba 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll @@ -244,8 +244,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { ; GFX8-NEXT: bb: ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> @@ -291,11 +291,11 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) ; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> ; GFX8-NEXT: ret <4 x i16> [[INS_31]] diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll index 7471d0f7d329c..68295bb0d78e3 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll @@ -4,16 +4,10 @@ define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) { ; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARG0_1:%.*]] = extractelement <9 x i16> undef, i64 7 -; CHECK-NEXT: [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8 -; CHECK-NEXT: [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7 -; CHECK-NEXT: [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[ARG0_1]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[TMP0]], i16 [[ARG0_2]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[ARG1_1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> [[TMP2]], i16 [[ARG1_2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP1]], <2 x i16> [[TMP3]]) -; CHECK-NEXT: ret <2 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1:%.*]], <9 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; CHECK-NEXT: ret <2 x i16> [[TMP2]] ; bb: %arg0.1 = extractelement <9 x i16> undef, i64 7 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll index f870fb3a9bc1f..eb4117bbf7209 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -1,34 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=slp-vectorizer -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck %s define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1 -; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2 -; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1 -; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1 -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1 -; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ] -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> -; CHECK-NEXT: ret <4 x half> [[TMP12]] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[O31:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> +; CHECK-NEXT: ret <4 x half> [[O31]] +; entry: %a0 = extractelement <4 x half> %in1, i64 0 %a1 = extractelement <4 x half> %in1, i64 1 @@ -59,32 +49,21 @@ bb1: define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis_reverse( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1 -; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2 -; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1 -; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1 -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1 -; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ] -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> -; CHECK-NEXT: ret <4 x half> [[TMP12]] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[O31:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> +; CHECK-NEXT: ret <4 x half> [[O31]] +; entry: %a0 = extractelement <4 x half> %in1, i64 0 %a1 = extractelement <4 x half> %in1, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll index 7080b693e0ff6..cc1fa07d4c4f3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll @@ -4,14 +4,11 @@ define void @test(<16 x half> %v) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> [[V:%.*]], i32 4 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> [[V]], i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[V:%.*]], <16 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x half> [[TMP0]] to <2 x float> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll index 244faa4b3e78e..a9e3aa33ae92d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -4,14 +4,11 @@ define void @test(<16 x half> %v) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> [[V:%.*]], i32 4 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> [[V]], i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[V:%.*]], <16 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x half> [[TMP0]] to <2 x float> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index a1babad830b88..ae8a52e669d35 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -7,18 +7,16 @@ define void @Test(i32) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP4]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP0:%.*]], [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> , i32 [[OP_RDX]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP0:%.*]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> , i32 [[OP_RDX]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = and <2 x i32> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <2 x i32> ; CHECK-NEXT: br label [[LOOP]] ; ; FORCE_REDUCTION-LABEL: @Test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll index c801cd296917f..ee217c07895ee 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll @@ -11,12 +11,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; SSE-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SSE-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -32,12 +32,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; SLM-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SLM-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -53,12 +53,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll index b4d525fc4db5a..0f7d3b5647ff9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll @@ -11,12 +11,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; SSE-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SSE-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -32,12 +32,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; SLM-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SLM-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -53,12 +53,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll index 40678b9c87829..11eddced91a12 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll @@ -163,7 +163,7 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: @sitofp_4i32_8i16( ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> ; CHECK-NEXT: [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[R71]] @@ -201,11 +201,11 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll index b09c142cca50c..59a77598b6640 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -163,7 +163,7 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: @sitofp_4i32_8i16( ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> ; CHECK-NEXT: [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[R71]] @@ -201,11 +201,11 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll index 2dcaec8d11023..0f59f595f911e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll @@ -98,7 +98,7 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) { ; SLM-LABEL: @fmul_fdiv_v4f32_const( ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i64 2 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00 ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll index 99e673ee52f54..ef13f7956e16d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -98,7 +98,7 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) { ; SLM-LABEL: @fmul_fdiv_v4f32_const( ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i64 2 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00 ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll index 19e7ed8d52117..9cf0e1184c4fe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -170,17 +170,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[R71]] @@ -236,8 +236,8 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i64 7 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i64 6 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> @@ -253,13 +253,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: ret <8 x i32> [[R7]] ; ; SLM-LABEL: @ashr_lshr_shl_v8i32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -267,13 +267,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: ret <8 x i32> [[R71]] ; ; AVX1-LABEL: @ashr_lshr_shl_v8i32( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX1-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX1-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -281,13 +281,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: ret <8 x i32> [[R71]] ; ; AVX2-LABEL: @ashr_lshr_shl_v8i32( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -295,13 +295,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; AVX2-NEXT: ret <8 x i32> [[R71]] ; ; AVX512-LABEL: @ashr_lshr_shl_v8i32( -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX512-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -443,10 +443,10 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX2-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 1 ; AVX2-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 ; AVX2-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX2-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i64 1 ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -460,10 +460,10 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX512-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 1 ; AVX512-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 ; AVX512-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX512-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i64 1 ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -503,11 +503,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; CHECK-LABEL: @add_sub_v8i32_splat( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[SHUFFLE]], [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[SHUFFLE]], [[A]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP5]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index c07c4b794f150..976be7d7f1c7e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -170,17 +170,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[R71]] @@ -236,8 +236,8 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i64 7 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i64 6 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i64 7 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> @@ -253,13 +253,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: ret <8 x i32> [[R7]] ; ; SLM-LABEL: @ashr_lshr_shl_v8i32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -267,13 +267,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: ret <8 x i32> [[R71]] ; ; AVX1-LABEL: @ashr_lshr_shl_v8i32( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX1-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX1-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -281,13 +281,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: ret <8 x i32> [[R71]] ; ; AVX2-LABEL: @ashr_lshr_shl_v8i32( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -295,13 +295,13 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; AVX2-NEXT: ret <8 x i32> [[R71]] ; ; AVX512-LABEL: @ashr_lshr_shl_v8i32( -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX512-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -443,10 +443,10 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX2-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 1 ; AVX2-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 ; AVX2-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX2-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> , i32 [[AB1]], i64 1 ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -460,10 +460,10 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX512-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 1 ; AVX512-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 ; AVX512-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX512-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> , i32 [[AB1]], i64 1 ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -503,11 +503,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; CHECK-LABEL: @add_sub_v8i32_splat( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[SHUFFLE]], [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[SHUFFLE]], [[A]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[A:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP2]], [[A]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP5]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll index a1e47e5b31aa0..c556ea7491d48 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll @@ -607,49 +607,25 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { ; SSE-NEXT: ret <8 x double> [[TMP1]] ; ; SLM-LABEL: @buildvector_div_8f64( -; SLM-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 -; SLM-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 -; SLM-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 -; SLM-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 -; SLM-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 -; SLM-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 -; SLM-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 -; SLM-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 -; SLM-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 -; SLM-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 -; SLM-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 -; SLM-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 -; SLM-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 -; SLM-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 -; SLM-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 -; SLM-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1 -; SLM-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]] -; SLM-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 -; SLM-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1 -; SLM-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[B2]], i32 0 -; SLM-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1 -; SLM-NEXT: [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]] -; SLM-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[A4]], i32 0 -; SLM-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[A5]], i32 1 -; SLM-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[B4]], i32 0 -; SLM-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[B5]], i32 1 -; SLM-NEXT: [[TMP15:%.*]] = fdiv <2 x double> [[TMP12]], [[TMP14]] -; SLM-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[A6]], i32 0 -; SLM-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[A7]], i32 1 -; SLM-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[B6]], i32 0 -; SLM-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[B7]], i32 1 -; SLM-NEXT: [[TMP20:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP19]] -; SLM-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x i32> -; SLM-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP23]], <8 x i32> -; SLM-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP24]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fdiv <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP9:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP8]] +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP12:%.*]] = fdiv <2 x double> [[TMP10]], [[TMP11]] +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x i32> +; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP15]], <8 x i32> +; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP16]], <8 x i32> ; SLM-NEXT: ret <8 x double> [[R73]] ; ; AVX-LABEL: @buildvector_div_8f64( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll index 8022b09f05c0b..f331735558b0a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll @@ -607,49 +607,25 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { ; SSE-NEXT: ret <8 x double> [[TMP1]] ; ; SLM-LABEL: @buildvector_div_8f64( -; SLM-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 -; SLM-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 -; SLM-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 -; SLM-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 -; SLM-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 -; SLM-NEXT: [[B0:%.*]] = extractelement <8 x double> [[B:%.*]], i32 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <8 x double> [[B]], i32 1 -; SLM-NEXT: [[B2:%.*]] = extractelement <8 x double> [[B]], i32 2 -; SLM-NEXT: [[B3:%.*]] = extractelement <8 x double> [[B]], i32 3 -; SLM-NEXT: [[B4:%.*]] = extractelement <8 x double> [[B]], i32 4 -; SLM-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 -; SLM-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 -; SLM-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 -; SLM-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 -; SLM-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 -; SLM-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 -; SLM-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1 -; SLM-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]] -; SLM-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 -; SLM-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1 -; SLM-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[B2]], i32 0 -; SLM-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1 -; SLM-NEXT: [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]] -; SLM-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[A4]], i32 0 -; SLM-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[A5]], i32 1 -; SLM-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[B4]], i32 0 -; SLM-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[B5]], i32 1 -; SLM-NEXT: [[TMP15:%.*]] = fdiv <2 x double> [[TMP12]], [[TMP14]] -; SLM-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[A6]], i32 0 -; SLM-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[A7]], i32 1 -; SLM-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[B6]], i32 0 -; SLM-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[B7]], i32 1 -; SLM-NEXT: [[TMP20:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP19]] -; SLM-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x i32> -; SLM-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP23]], <8 x i32> -; SLM-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP24]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fdiv <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP9:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP8]] +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP12:%.*]] = fdiv <2 x double> [[TMP10]], [[TMP11]] +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x i32> +; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP15]], <8 x i32> +; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP16]], <8 x i32> ; SLM-NEXT: ret <8 x double> [[R73]] ; ; AVX-LABEL: @buildvector_div_8f64( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 65817d74ee54f..18d9f8c903c5b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -5,19 +5,19 @@ define void @exceed(double %0, double %1) { ; CHECK-LABEL: @exceed( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP5]], undef +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef ; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP5]], undef +; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef ; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef @@ -27,16 +27,15 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP6]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP4]], <2 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[TMP12]], undef +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP5]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <2 x double> [[TMP13]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] ; CHECK-NEXT: ] @@ -45,7 +44,7 @@ define void @exceed(double %0, double %1) { ; CHECK: bb2: ; CHECK-NEXT: br label [[LABEL]] ; CHECK: label: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x double> [ [[TMP10]], [[BB1]] ], [ [[TMP13]], [[BB2]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP14]], [[BB2]] ] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index 5b7935ba02c32..565a2d21d0674 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -33,13 +33,9 @@ define void @main() #0 { ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> , [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> , double [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> , double [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[TMP7]] -; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP1]] +; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8 ; CHECK-NEXT: unreachable ; CHECK: cond.true63.us: ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll index 04d190bccfea4..74b9f31973d12 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll @@ -3,12 +3,9 @@ define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @g( -; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[Y1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP2]], [[TMP2]] -; CHECK-NEXT: ret <2 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]] +; CHECK-NEXT: ret <2 x i8> [[TMP2]] ; %x0 = extractelement <2 x i8> %x, i32 0 %y1 = extractelement <2 x i8> %y, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll index b6535f9f2e696..9276efb6e9cdc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll @@ -3,12 +3,9 @@ define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @g( -; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0 -; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[Y1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP2]], [[TMP2]] -; CHECK-NEXT: ret <2 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]] +; CHECK-NEXT: ret <2 x i8> [[TMP2]] ; %x0 = extractelement <2 x i8> %x, i32 0 %y1 = extractelement <2 x i8> %y, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll index 7de9f4a3aee7c..8fde48dd3bb5a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll @@ -28,8 +28,8 @@ define void @fextr1(ptr %ptr) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, ptr undef, align 16 ; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[LD]], -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: store <2 x double> [[SHUFFLE]], ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -48,12 +48,9 @@ define void @fextr2(ptr %ptr) { ; CHECK-LABEL: @fextr2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <4 x double>, ptr undef, align 32 -; CHECK-NEXT: [[V0:%.*]] = extractelement <4 x double> [[LD]], i32 0 -; CHECK-NEXT: [[V1:%.*]] = extractelement <4 x double> [[LD]], i32 1 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[LD]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[TMP0]], +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll index 33959dc48f296..35f521fbe8113 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll @@ -14,10 +14,8 @@ define float @multi_uses(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @multi_uses( -; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Y1]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll index 6bff6d9b45a3d..f5718e04df9ad 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -82,23 +82,19 @@ define float @f_used_twice_in_tree(<2 x float> %x) { ; CHECK-NEXT: ret float [[ADD]] ; ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH1-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; THRESH1-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]] -; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] +; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] +; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( -; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH2-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; THRESH2-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]] -; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] +; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] +; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll index 482334e510f64..e1c816fd383d7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-extractelements-different-bbs.ll @@ -6,32 +6,29 @@ define i32 @foo(i32 %a) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[A:%.*]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP0]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1 -; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[OP_RDX11]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 +; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], 0 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX12]], [[BB1]] ], [ 0, [[BB2:%.*]] ] +; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX11]], [[BB1]] ], [ 0, [[BB2:%.*]] ] ; CHECK-NEXT: ret i32 0 ; CHECK: bb4: -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE8]] -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) -; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[TMP11]], 0 -; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[OP_RDX9]], [[TMP2]] -; CHECK-NEXT: ret i32 [[OP_RDX10]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP2]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) +; CHECK-NEXT: [[OP_RDX8:%.*]] = add i32 [[TMP10]], 0 +; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[OP_RDX8]], [[TMP3]] +; CHECK-NEXT: ret i32 [[OP_RDX9]] ; CHECK: bb5: ; CHECK-NEXT: br label [[BB4:%.*]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll index 822db3a2f6ea5..f8fbcc58295a1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll @@ -143,8 +143,8 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { ; PR41892 define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v2f32_store( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void @@ -220,8 +220,8 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[R021:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R021]], double [[R3]], i64 3 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 ; CHECK-NEXT: ret <4 x double> [[R03]] ; %a0 = extractelement <4 x double> %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index 8fd490a9e38eb..5507cc9404861 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -143,8 +143,8 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { ; PR41892 define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v2f32_store( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void @@ -220,8 +220,8 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[R021:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R021]], double [[R3]], i64 3 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 ; CHECK-NEXT: ret <4 x double> [[R03]] ; %a0 = extractelement <4 x double> %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll index 24b0e545bd687..d1350bc029019 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll @@ -3,22 +3,13 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) { ; CHECK-LABEL: @simple_select( -; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 -; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 -; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: ret <4 x float> [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP6]] ; %c0 = extractelement <4 x i32> %c, i32 0 %c1 = extractelement <4 x i32> %c, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll index e266ce02962b1..654121553b4cf 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -146,10 +146,10 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 ; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0 ; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> ; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 ; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> ; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] ; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] ; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 @@ -273,37 +273,19 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32 ; Unused insertelement define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_no_users( -; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 -; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 -; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 -; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 -; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 -; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 -; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[C3]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 4be025a7324a7..d855dd6292c00 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -180,10 +180,10 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 ; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0 ; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> ; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 ; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> ; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] ; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] ; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 @@ -307,37 +307,19 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32 ; Unused insertelement define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_no_users( -; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 -; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 -; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 -; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 -; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 -; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 -; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[C3]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll index 6756d9aa2101e..3fc6fe9e60611 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll @@ -13,11 +13,11 @@ define <4 x double> @test(ptr %p2, double %i1754, double %i1781, double %i1778) ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[I1778:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[I1781]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[I1772]], i32 3 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x double> [[TMP3]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> , double [[I1797]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x double> [[TMP4]], [[TMP5]] -; CHECK-NEXT: ret <4 x double> [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> , double [[I1797]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x double> [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret <4 x double> [[TMP7]] ; entry: %i1771 = getelementptr inbounds double, ptr %p2, i64 54 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 402e111c98598..9ed021448e020 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -10,10 +10,10 @@ define void @foo() personality ptr @bar { ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP10:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP8:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] ; CHECK-NEXT: ret void ; CHECK: bb3: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] ; CHECK: bb4: @@ -21,32 +21,30 @@ define void @foo() personality ptr @bar { ; CHECK: bb5: ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ , [[BB8:%.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP5]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ , [[BB8:%.*]] ] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BB10]] ], [ [[TMP12:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP10]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP12]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ] +; CHECK-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index 1ed2841a770db..469b15d074186 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -23,14 +23,14 @@ define void @lookahead_basic(ptr %array) { ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 2 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 4 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr [[IDX4]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[IDX6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP8]] -; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[IDX4]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]] +; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[ARRAY]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -85,12 +85,12 @@ define void @lookahead_alt1(ptr %array) { ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 5 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]] -; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP2]] +; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[ARRAY]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -139,18 +139,18 @@ define void @lookahead_alt2(ptr %array) { ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 2 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 4 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr [[IDX4]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[IDX6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP12:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[TMP10]], [[TMP13]] -; CHECK-NEXT: store <2 x double> [[TMP14]], ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[IDX2]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[IDX4]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IDX6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]] +; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[ARRAY]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -207,22 +207,22 @@ define void @lookahead_external_uses(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S, ptr ; CHECK-NEXT: [[B0:%.*]] = load double, ptr [[B]], align 8 ; CHECK-NEXT: [[C0:%.*]] = load double, ptr [[C:%.*]], align 8 ; CHECK-NEXT: [[D0:%.*]] = load double, ptr [[D:%.*]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[B2:%.*]] = load double, ptr [[IDXB2]], align 8 ; CHECK-NEXT: [[A2:%.*]] = load double, ptr [[IDXA2]], align 8 ; CHECK-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]] -; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[S:%.*]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: store double [[TMP12]], ptr [[EXT1:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B2]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[B1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP8]] +; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[S:%.*]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP0]], i32 1 +; CHECK-NEXT: store double [[TMP10]], ptr [[EXT1:%.*]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -285,24 +285,24 @@ define void @lookahead_limit_users_budget(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S ; CHECK-NEXT: [[B0:%.*]] = load double, ptr [[B]], align 8 ; CHECK-NEXT: [[C0:%.*]] = load double, ptr [[C:%.*]], align 8 ; CHECK-NEXT: [[D0:%.*]] = load double, ptr [[D:%.*]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[B2:%.*]] = load double, ptr [[IDXB2]], align 8 ; CHECK-NEXT: [[A2:%.*]] = load double, ptr [[IDXA2]], align 8 ; CHECK-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]] -; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[S:%.*]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: store double [[TMP12]], ptr [[EXT1:%.*]], align 8 -; CHECK-NEXT: store double [[TMP12]], ptr [[EXT2:%.*]], align 8 -; CHECK-NEXT: store double [[TMP12]], ptr [[EXT3:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B2]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[B1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP8]] +; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[S:%.*]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP0]], i32 1 +; CHECK-NEXT: store double [[TMP10]], ptr [[EXT1:%.*]], align 8 +; CHECK-NEXT: store double [[TMP10]], ptr [[EXT2:%.*]], align 8 +; CHECK-NEXT: store double [[TMP10]], ptr [[EXT3:%.*]], align 8 ; CHECK-NEXT: store double [[B1]], ptr [[EXT4:%.*]], align 8 ; CHECK-NEXT: store double [[B1]], ptr [[EXT5:%.*]], align 8 ; CHECK-NEXT: ret void @@ -358,13 +358,13 @@ declare double @_ZN1i2axEv() define void @lookahead_crash(ptr %A, ptr %S, ptr %Arg0) { ; CHECK-LABEL: @lookahead_crash( -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 ; CHECK-NEXT: [[C0:%.*]] = call double @_ZN1i2ayEv(ptr [[ARG0:%.*]]) ; CHECK-NEXT: [[C1:%.*]] = call double @_ZN1i2axEv() -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[C1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[S:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[S:%.*]], align 8 ; CHECK-NEXT: ret void ; %IdxA1 = getelementptr inbounds double, ptr %A, i64 1 @@ -393,13 +393,13 @@ define void @ChecksExtractScores(ptr %storeArray, ptr %array, ptr %vecPtr1, ptr ; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4 ; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[LOADVEC]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[LOADVEC2]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[STOREARRAY:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[LOADVEC2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] +; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[STOREARRAY:%.*]], align 8 ; CHECK-NEXT: ret void ; %idx1 = getelementptr inbounds double, ptr %array, i64 1 @@ -576,22 +576,16 @@ define void @ChecksExtractScores_different_vectors(ptr %storeArray, ptr %array, ; SSE-LABEL: @ChecksExtractScores_different_vectors( ; SSE-NEXT: [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4 ; SSE-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4 -; SSE-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0 -; SSE-NEXT: [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1 ; SSE-NEXT: [[LOADVEC3:%.*]] = load <2 x double>, ptr [[VECPTR3:%.*]], align 4 ; SSE-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, ptr [[VECPTR4:%.*]], align 4 -; SSE-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0 -; SSE-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1 -; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAY:%.*]], align 4 -; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1 -; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]] -; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1 -; SSE-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]] -; SSE-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP8]] -; SSE-NEXT: store <2 x double> [[TMP9]], ptr [[STOREARRAY:%.*]], align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY:%.*]], align 4 +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[LOADVEC2]], <2 x double> [[LOADVEC3]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], [[TMP1]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[LOADVEC]], <2 x double> [[LOADVEC4]], <2 x i32> +; SSE-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], [[TMP1]] +; SSE-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP4]], [[TMP6]] +; SSE-NEXT: store <2 x double> [[TMP7]], ptr [[STOREARRAY:%.*]], align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @ChecksExtractScores_different_vectors( @@ -600,22 +594,16 @@ define void @ChecksExtractScores_different_vectors(ptr %storeArray, ptr %array, ; AVX-NEXT: [[LOADA1:%.*]] = load double, ptr [[IDX1]], align 4 ; AVX-NEXT: [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4 ; AVX-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4 -; AVX-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0 -; AVX-NEXT: [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1 ; AVX-NEXT: [[LOADVEC3:%.*]] = load <2 x double>, ptr [[VECPTR3:%.*]], align 4 ; AVX-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, ptr [[VECPTR4:%.*]], align 4 -; AVX-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0 -; AVX-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1 -; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 -; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1 -; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE]] -; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[EXTRB1]], i32 1 -; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE1]] +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[LOADVEC]], <2 x double> [[LOADVEC2]], <2 x i32> +; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[LOADVEC3]], <2 x double> [[LOADVEC4]], <2 x i32> +; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 +; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]] ; AVX-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]] ; AVX-NEXT: store <2 x double> [[TMP9]], ptr [[STOREARRAY:%.*]], align 8 ; AVX-NEXT: ret void @@ -651,12 +639,13 @@ define void @ChecksExtractScores_different_vectors(ptr %storeArray, ptr %array, define double @splat_loads(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) { ; SSE-LABEL: @splat_loads( ; SSE-NEXT: entry: -; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 -; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8 -; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8 +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP0]], [[TMP4]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] ; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 ; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 ; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] @@ -667,17 +656,17 @@ define double @splat_loads(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) { ; AVX-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, ptr [[ARRAY2:%.*]], i64 1 ; AVX-NEXT: [[LD_2_0:%.*]] = load double, ptr [[ARRAY2]], align 8 ; AVX-NEXT: [[LD_2_1:%.*]] = load double, ptr [[GEP_2_1]], align 8 -; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 -; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; AVX-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 +; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]] ; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] -; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] -; AVX-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 -; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP0]], [[TMP5]] +; AVX-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] +; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP8]], [[TMP9]] ; AVX-NEXT: ret double [[ADD3]] ; entry: @@ -707,17 +696,18 @@ entry: define double @splat_loads_with_internal_uses(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) { ; SSE-LABEL: @splat_loads_with_internal_uses( ; SSE-NEXT: entry: -; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 -; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8 -; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> -; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE1]] -; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8 +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP0]], [[TMP4]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]] +; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] ; SSE-NEXT: ret double [[RES]] ; ; AVX-LABEL: @splat_loads_with_internal_uses( @@ -725,18 +715,18 @@ define double @splat_loads_with_internal_uses(ptr %array1, ptr %array2, ptr %ptr ; AVX-NEXT: [[GEP_2_1:%.*]] = getelementptr inbounds double, ptr [[ARRAY2:%.*]], i64 1 ; AVX-NEXT: [[LD_2_0:%.*]] = load double, ptr [[ARRAY2]], align 8 ; AVX-NEXT: [[LD_2_1:%.*]] = load double, ptr [[GEP_2_1]], align 8 -; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 -; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; AVX-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8 +; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP0]], [[TMP2]] ; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] -; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] -; AVX-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE]] -; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]] +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP0]], [[TMP5]] +; AVX-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] +; AVX-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], [[TMP2]] +; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] ; AVX-NEXT: ret double [[RES]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll index 6cf4a233c2d2d..6000434ac1e95 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll @@ -103,57 +103,40 @@ define i64 @test_3() #0 { ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ undef, [[BB1]] ], [ poison, [[BB2:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <32 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <32 x i32> [[TMP3]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <32 x i32> [[TMP4]], i32 [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <32 x i32> [[TMP5]], i32 [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <32 x i32> [[TMP6]], i32 [[TMP2]], i32 4 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <32 x i32> [[TMP7]], i32 [[TMP2]], i32 5 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <32 x i32> [[TMP8]], i32 [[TMP2]], i32 6 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <32 x i32> [[TMP9]], i32 [[TMP2]], i32 7 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <32 x i32> [[TMP10]], i32 [[TMP2]], i32 8 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <32 x i32> [[TMP11]], i32 [[TMP2]], i32 9 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 [[TMP2]], i32 10 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <32 x i32> [[TMP13]], i32 [[TMP2]], i32 11 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <32 x i32> [[TMP14]], i32 [[TMP2]], i32 12 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <32 x i32> [[TMP15]], i32 [[TMP2]], i32 13 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <32 x i32> [[TMP16]], i32 [[TMP2]], i32 14 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <32 x i32> [[TMP17]], i32 [[TMP2]], i32 15 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <32 x i32> [[TMP18]], i32 [[TMP2]], i32 16 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <32 x i32> [[TMP19]], i32 [[TMP2]], i32 17 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <32 x i32> [[TMP20]], i32 [[TMP2]], i32 18 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <32 x i32> [[TMP21]], i32 [[TMP2]], i32 19 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <32 x i32> [[TMP22]], i32 [[TMP2]], i32 20 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <32 x i32> [[TMP23]], i32 [[TMP2]], i32 21 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <32 x i32> [[TMP24]], i32 [[TMP2]], i32 22 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <32 x i32> [[TMP25]], i32 [[TMP2]], i32 23 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <32 x i32> [[TMP26]], i32 [[TMP2]], i32 24 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <32 x i32> [[TMP27]], i32 [[TMP2]], i32 25 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <32 x i32> [[TMP28]], i32 [[TMP2]], i32 26 -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <32 x i32> [[TMP29]], i32 [[TMP2]], i32 27 -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <32 x i32> [[TMP30]], i32 [[TMP2]], i32 28 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <32 x i32> [[TMP31]], i32 [[TMP2]], i32 29 -; CHECK-NEXT: [[TMP33:%.*]] = insertelement <32 x i32> [[TMP32]], i32 [[TMP2]], i32 30 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <32 x i32> [[TMP33]], i32 [[TMP2]], i32 31 -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP34]]) -; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP35]], [[TMP36]] -; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TMP2]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[TMP2]], [[TMP2]] -; CHECK-NEXT: [[OP_RDX7:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]] -; CHECK-NEXT: [[OP_RDX8:%.*]] = mul i32 [[OP_RDX3]], [[OP_RDX4]] -; CHECK-NEXT: [[OP_RDX9:%.*]] = mul i32 [[OP_RDX5]], [[OP_RDX6]] -; CHECK-NEXT: [[OP_RDX10:%.*]] = mul i32 [[OP_RDX7]], [[OP_RDX8]] -; CHECK-NEXT: [[OP_RDX11:%.*]] = mul i32 [[OP_RDX9]], [[TMP2]] -; CHECK-NEXT: [[OP_RDX12:%.*]] = mul i32 [[OP_RDX10]], [[OP_RDX11]] -; CHECK-NEXT: [[VAL64:%.*]] = add i32 undef, [[OP_RDX12]] +; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB2:%.*]] ] +; CHECK-NEXT: [[VAL4:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB2]] ] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> poison, i32 [[VAL4]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP1]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX7:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX8:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX9:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX10:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX11:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX12:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX13:%.*]] = mul i32 [[VAL4]], [[VAL4]] +; CHECK-NEXT: [[OP_RDX14:%.*]] = mul i32 [[OP_RDX]], [[OP_RDX1]] +; CHECK-NEXT: [[OP_RDX15:%.*]] = mul i32 [[OP_RDX2]], [[OP_RDX3]] +; CHECK-NEXT: [[OP_RDX16:%.*]] = mul i32 [[OP_RDX4]], [[OP_RDX5]] +; CHECK-NEXT: [[OP_RDX17:%.*]] = mul i32 [[OP_RDX6]], [[OP_RDX7]] +; CHECK-NEXT: [[OP_RDX18:%.*]] = mul i32 [[OP_RDX8]], [[OP_RDX9]] +; CHECK-NEXT: [[OP_RDX19:%.*]] = mul i32 [[OP_RDX10]], [[OP_RDX11]] +; CHECK-NEXT: [[OP_RDX20:%.*]] = mul i32 [[OP_RDX12]], [[OP_RDX13]] +; CHECK-NEXT: [[OP_RDX21:%.*]] = mul i32 [[OP_RDX14]], [[OP_RDX15]] +; CHECK-NEXT: [[OP_RDX22:%.*]] = mul i32 [[OP_RDX16]], [[OP_RDX17]] +; CHECK-NEXT: [[OP_RDX23:%.*]] = mul i32 [[OP_RDX18]], [[OP_RDX19]] +; CHECK-NEXT: [[OP_RDX24:%.*]] = mul i32 [[OP_RDX20]], [[VAL]] +; CHECK-NEXT: [[OP_RDX25:%.*]] = mul i32 [[OP_RDX21]], [[OP_RDX22]] +; CHECK-NEXT: [[OP_RDX26:%.*]] = mul i32 [[OP_RDX23]], [[OP_RDX24]] +; CHECK-NEXT: [[OP_RDX27:%.*]] = mul i32 [[OP_RDX25]], [[OP_RDX26]] +; CHECK-NEXT: [[VAL64:%.*]] = add i32 undef, [[OP_RDX27]] ; CHECK-NEXT: [[VAL65:%.*]] = sext i32 [[VAL64]] to i64 ; CHECK-NEXT: ret i64 [[VAL65]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll index 32ad5fd80cf57..4795ac6559203 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll @@ -63,11 +63,8 @@ define i32 @foo(ptr noalias nocapture %A, ptr noalias nocapture %B, float %T) { ; PR41892 define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v2f32_store( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[X1]], i32 1 -; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; %x0 = extractelement <4 x float> %f, i64 0 @@ -95,13 +92,10 @@ define void @test_v4f32_v2f32_splat_store(<4 x float> %f, ptr %p){ define void @test_v4f32_v3f32_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v3f32_store( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F:%.*]], i64 2 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[X1]], i32 1 -; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[P]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[P]], align 4 ; CHECK-NEXT: store float [[X2]], ptr [[P2]], align 4 ; CHECK-NEXT: ret void ; @@ -156,10 +150,8 @@ define void @test_v4f32_v4f32_store(<4 x float> %f, ptr %p){ define void @test_v4f32_v4f32_splat_store(<4 x float> %f, ptr %p){ ; CHECK-LABEL: @test_v4f32_v4f32_splat_store( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: store <4 x float> [[SHUFFLE]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; %x0 = extractelement <4 x float> %f, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index 3850b42327857..8284fc404ca18 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -71,12 +71,19 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load( -; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> -; AVX512VL-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0:![0-9]+]] -; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], -; AVX512VL-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 +; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 +; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 +; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 +; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 +; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 +; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], +; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -174,11 +181,11 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado ; ; AVX512VL-LABEL: @gather_load_2( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> -; AVX512VL-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], -; AVX512VL-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x ptr> [[TMP4]], <4 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], +; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -304,20 +311,20 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; ; AVX512F-LABEL: @gather_load_3( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512F-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> -; AVX512F-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP5]], -; AVX512F-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], +; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_3( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> -; AVX512VL-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP5]], -; AVX512VL-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], +; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load i32, ptr %1, align 4, !tbaa !2 @@ -461,20 +468,20 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; ; AVX512F-LABEL: @gather_load_4( ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 -; AVX512F-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> -; AVX512F-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], -; AVX512F-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> +; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], +; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_4( ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> -; AVX512VL-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], -; AVX512VL-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> +; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], +; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %t5 = getelementptr inbounds i32, ptr %t0, i64 1 @@ -551,14 +558,15 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP18]], i64 3 ; SSE-NEXT: [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]] ; SSE-NEXT: store <4 x float> [[TMP27]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; SSE-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; SSE-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 +; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 +; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 +; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 +; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 +; SSE-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 +; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 +; SSE-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 +; SSE-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4, !tbaa [[TBAA0]] @@ -566,17 +574,16 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP35]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP44:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0 -; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP39]], i64 1 -; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP41]], i64 2 -; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP43]], i64 3 -; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> poison, float [[TMP38]], i64 0 -; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP40]], i64 1 -; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP42]], i64 2 -; SSE-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP44]], i64 3 -; SSE-NEXT: [[TMP53:%.*]] = fdiv <4 x float> [[TMP48]], [[TMP52]] -; SSE-NEXT: store <4 x float> [[TMP53]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP44:%.*]] = insertelement <4 x float> poison, float [[TMP36]], i64 0 +; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP38]], i64 1 +; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP40]], i64 2 +; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP42]], i64 3 +; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0 +; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[TMP48]], float [[TMP39]], i64 1 +; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP41]], i64 2 +; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP43]], i64 3 +; SSE-NEXT: [[TMP52:%.*]] = fdiv <4 x float> [[TMP47]], [[TMP51]] +; SSE-NEXT: store <4 x float> [[TMP52]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: ret void ; ; AVX-LABEL: @gather_load_div( @@ -685,24 +692,24 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; ; AVX512F-LABEL: @gather_load_div( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512F-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]] -; AVX512F-NEXT: store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] +; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_div( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]] -; AVX512VL-NEXT: store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] +; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load float, ptr %1, align 4, !tbaa !2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index a1d08c987541b..7597cf14f7651 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -71,12 +71,19 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load( -; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> -; AVX512VL-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0:![0-9]+]] -; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], -; AVX512VL-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1 +; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11 +; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i64 0 +; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 +; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 +; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 +; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], +; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -174,11 +181,11 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado ; ; AVX512VL-LABEL: @gather_load_2( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> -; AVX512VL-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], -; AVX512VL-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x ptr> [[TMP4]], <4 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], +; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 @@ -304,20 +311,20 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; ; AVX512F-LABEL: @gather_load_3( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512F-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> -; AVX512F-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP5]], -; AVX512F-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], +; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_3( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> -; AVX512VL-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP5]], -; AVX512VL-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], +; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load i32, ptr %1, align 4, !tbaa !2 @@ -461,20 +468,20 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; ; AVX512F-LABEL: @gather_load_4( ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 -; AVX512F-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> -; AVX512F-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], -; AVX512F-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> +; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], +; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_4( ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> -; AVX512VL-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], -; AVX512VL-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> +; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], +; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %t5 = getelementptr inbounds i32, ptr %t0, i64 1 @@ -551,14 +558,15 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP18]], i64 3 ; SSE-NEXT: [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]] ; SSE-NEXT: store <4 x float> [[TMP27]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 -; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 -; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 -; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 -; SSE-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 -; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 -; SSE-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 -; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 +; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17 +; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33 +; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; SSE-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30 +; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5 +; SSE-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27 +; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20 +; SSE-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23 +; SSE-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4, !tbaa [[TBAA0]] @@ -566,17 +574,16 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; SSE-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP35]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP44:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]] -; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0 -; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP39]], i64 1 -; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP41]], i64 2 -; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP43]], i64 3 -; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> poison, float [[TMP38]], i64 0 -; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP40]], i64 1 -; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP42]], i64 2 -; SSE-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP44]], i64 3 -; SSE-NEXT: [[TMP53:%.*]] = fdiv <4 x float> [[TMP48]], [[TMP52]] -; SSE-NEXT: store <4 x float> [[TMP53]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] +; SSE-NEXT: [[TMP44:%.*]] = insertelement <4 x float> poison, float [[TMP36]], i64 0 +; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP38]], i64 1 +; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP40]], i64 2 +; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP42]], i64 3 +; SSE-NEXT: [[TMP48:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0 +; SSE-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[TMP48]], float [[TMP39]], i64 1 +; SSE-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP41]], i64 2 +; SSE-NEXT: [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP43]], i64 3 +; SSE-NEXT: [[TMP52:%.*]] = fdiv <4 x float> [[TMP47]], [[TMP51]] +; SSE-NEXT: store <4 x float> [[TMP52]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] ; SSE-NEXT: ret void ; ; AVX-LABEL: @gather_load_div( @@ -685,24 +692,24 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; ; AVX512F-LABEL: @gather_load_div( ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512F-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512F-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> -; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]] -; AVX512F-NEXT: store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] +; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_div( ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]] -; AVX512VL-NEXT: store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> +; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] +; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = load float, ptr %1, align 4, !tbaa !2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll index ccc559d30b6b3..d91470c50992e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll @@ -9,7 +9,7 @@ define dso_local <4 x float> @foo(<4 x i32> %0) { ; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP8]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 94dfb4de871b3..6e0aafeacd309 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -95,19 +95,16 @@ define i1 @logical_or_fcmp(<4 x float> %x) { define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) { ; SSE-LABEL: @logical_and_icmp_diff_preds( ; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 ; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 ; SSE-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0 ; SSE-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X3]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[X1]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], zeroinitializer -; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP4]], i1 false +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer +; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 +; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP3]], i1 false ; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 -; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false +; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 +; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP4]], i1 false ; SSE-NEXT: ret i1 [[S3]] ; ; AVX-LABEL: @logical_and_icmp_diff_preds( @@ -187,16 +184,13 @@ define i1 @mixed_logical_icmp(<4 x i32> %x) { define i1 @logical_and_icmp_subvec(<4 x i32> %x) { ; SSE-LABEL: @logical_and_icmp_subvec( -; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X1]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[X0]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], zeroinitializer +; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer ; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0 -; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; SSE-NEXT: [[S1:%.*]] = select i1 [[TMP5]], i1 [[TMP4]], i1 false +; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 +; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 +; SSE-NEXT: [[S1:%.*]] = select i1 [[TMP4]], i1 [[TMP3]], i1 false ; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false ; SSE-NEXT: ret i1 [[S2]] ; @@ -228,13 +222,13 @@ define i1 @logical_and_icmp_subvec(<4 x i32> %x) { define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp( -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]]) -; CHECK-NEXT: ret i1 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) +; CHECK-NEXT: ret i1 [[TMP6]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -260,15 +254,15 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp( -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6 -; CHECK-NEXT: call void @use1(i1 [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) -; CHECK-NEXT: ret i1 [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6 +; CHECK-NEXT: call void @use1(i1 [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]]) +; CHECK-NEXT: ret i1 [[TMP7]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -335,27 +329,20 @@ define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32( -; CHECK-NEXT: [[X0:%.*]] = extractelement <8 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <8 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <8 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <8 x i32> [[X]], i32 3 ; CHECK-NEXT: [[Y0:%.*]] = extractelement <8 x i32> [[Y:%.*]], i32 0 ; CHECK-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1 ; CHECK-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2 ; CHECK-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[X2]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[X3]], i32 3 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> , i32 [[Y0]], i32 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y1]], i32 5 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[Y2]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[Y3]], i32 7 -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = freeze <8 x i1> [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP10]]) -; CHECK-NEXT: ret i1 [[TMP11]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> , i32 [[Y0]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[Y1]], i32 5 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[Y2]], i32 6 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y3]], i32 7 +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP2]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]]) +; CHECK-NEXT: ret i1 [[TMP9]] ; %x0 = extractelement <8 x i32> %x, i32 0 %x1 = extractelement <8 x i32> %x, i32 1 @@ -386,21 +373,18 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { ; SSE-LABEL: @logical_and_icmp_clamp_partial( ; SSE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 -; SSE-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 0 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 -; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[TMP5]], +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], ; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42 -; SSE-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[X]], -; SSE-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]] -; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]]) -; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 -; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP9]], i1 [[TMP10]], i1 false -; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 -; SSE-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP11]], i1 [[C2]], i1 false -; SSE-NEXT: [[TMP12:%.*]] = freeze i1 [[OP_RDX]] -; SSE-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP12]], i1 [[OP_RDX1]], i1 false +; SSE-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], +; SSE-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; SSE-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) +; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false +; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; SSE-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP8]], i1 [[C2]], i1 false +; SSE-NEXT: [[TMP9:%.*]] = freeze i1 [[OP_RDX]] +; SSE-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP9]], i1 [[OP_RDX1]], i1 false ; SSE-NEXT: ret i1 [[OP_RDX2]] ; ; AVX-LABEL: @logical_and_icmp_clamp_partial( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll index 94aa4449c6797..3daebe50d724f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll @@ -11,15 +11,9 @@ define i64 @test() { ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB2:%.*]] ], [ 0, [[BB1:%.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 0, [[BB1]] ] ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP44:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP4]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP44]], i32 [[TMP4]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP4]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP7]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP8]], [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[TMP4]], [[TMP4]] ; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[OP_RDX]], [[OP_RDX1]] ; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[OP_RDX2]], [[TMP]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index e8ca9aeb8147c..d11eb609b12d0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -8,9 +8,9 @@ define void @fextr(ptr %ptr) { ; CHECK-NEXT: [[LD:%.*]] = load <8 x i16>, ptr undef, align 16 ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: store <8 x i16> [[TMP0]], ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[LD]], [[TMP0]] +; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[PTR:%.*]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll index 24334c43da684..5cf0dc6a40a81 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll @@ -3,23 +3,19 @@ define void @test(ptr %p, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[S1:%.*]] = sub i32 [[A:%.*]], 1 -; CHECK-NEXT: [[S2:%.*]] = sub i32 [[B:%.*]], 1 -; CHECK-NEXT: [[M1:%.*]] = mul i32 [[S1]], [[E:%.*]] -; CHECK-NEXT: [[M2:%.*]] = mul i32 [[S2]], [[F:%.*]] -; CHECK-NEXT: [[M3:%.*]] = mul i32 [[S1]], [[C:%.*]] -; CHECK-NEXT: [[M4:%.*]] = mul i32 [[S2]], [[D:%.*]] -; CHECK-NEXT: [[A1:%.*]] = add i32 [[A]], [[M1]] -; CHECK-NEXT: [[A2:%.*]] = add i32 [[B]], [[M2]] -; CHECK-NEXT: [[A3:%.*]] = add i32 [[C]], [[M3]] -; CHECK-NEXT: [[A4:%.*]] = add i32 [[D]], [[M4]] -; CHECK-NEXT: store i32 [[A1]], ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 1 -; CHECK-NEXT: store i32 [[A2]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 2 -; CHECK-NEXT: store i32 [[A3]], ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 3 -; CHECK-NEXT: store i32 [[A4]], ptr [[GEP2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[E:%.*]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[F:%.*]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[C:%.*]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[D:%.*]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], [[TMP9]] +; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; %s1 = sub i32 %a, 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll index ec555f7e7aacc..8dbb79cd81617 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-13 | FileCheck %s +; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-14 | FileCheck %s define void @test(i1 %c, ptr %arg) { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll index c34a91f6be7ce..3b4938f220fdd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll @@ -13,16 +13,16 @@ define void @test(ptr %a, ptr %b) { ; CHECK: for.body: ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr null, align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[SHUFFLE]], [[TMP6]] -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[SHUFFLE]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[SHUFFLE2]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[SHUFFLE1]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP9]], zeroinitializer -; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[RESULT]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], zeroinitializer +; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[RESULT]], align 4 ; CHECK-NEXT: br label [[FOR_BODY]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index 666a2ec85c0fa..aa3c2be7dc9c2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -12,24 +12,22 @@ define void @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP10:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: ; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double> ; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> , double [[CONV2]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] +; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]] +; CHECK-NEXT: [[SUB1:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> , double [[SUB1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x double> [[TMP6]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <4 x double> [[TMP6]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP2]], <4 x float> [[TMP8]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP10]] = phi <4 x float> [ [[TMP9]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: