diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8a07f9c36cd44..df5d81a5466df 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5448,40 +5448,136 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { if (!TE.ReorderIndices.empty()) return TE.ReorderIndices; - auto PHICompare = [&](unsigned I1, unsigned I2) { - Value *V1 = TE.Scalars[I1]; - Value *V2 = TE.Scalars[I2]; - if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0)) - return false; - if (V1->getNumUses() < V2->getNumUses()) - return true; - if (V1->getNumUses() > V2->getNumUses()) - return false; - auto *FirstUserOfPhi1 = cast(*V1->user_begin()); - auto *FirstUserOfPhi2 = cast(*V2->user_begin()); - if (auto *IE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *IE2 = dyn_cast(FirstUserOfPhi2)) { - if (!areTwoInsertFromSameBuildVector( - IE1, IE2, - [](InsertElementInst *II) { return II->getOperand(0); })) - return I1 < I2; - return getElementIndex(IE1) < getElementIndex(IE2); - } - if (auto *EE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *EE2 = dyn_cast(FirstUserOfPhi2)) { - if (EE1->getOperand(0) != EE2->getOperand(0)) - return I1 < I2; - return getElementIndex(EE1) < getElementIndex(EE2); - } - return I1 < I2; - }; - SmallDenseMap PhiToId; + DenseMap PhiToId; SmallVector Phis(TE.Scalars.size()); std::iota(Phis.begin(), Phis.end(), 0); + + BitVector Seen(Phis.size()); + SmallVector> Groups; + Groups.resize(Phis.size(), {}); + + for (auto const Phidx : Phis) { + // We've already found a group for this Phidx + if (Seen.test(Phidx)) + continue; + + Groups[Phidx].push_back(Phidx); + + auto UserIterPhi1 = TE.Scalars[Phidx]->user_begin(); + if (UserIterPhi1.atEnd()) + continue; + + auto *FirstUserOfPhi1 = cast(*UserIterPhi1); + + unsigned Count = 0; + if (auto *IE1 = dyn_cast(FirstUserOfPhi1)) { + auto Width = IE1->getType()->getElementCount(); + assert(!Width.isScalable()); + Count = Width.getFixedValue() - 1; + } else if (auto *EE1 = dyn_cast(FirstUserOfPhi1)) { + // Count is unused in the case of extract element instructions. + Count = -1; + } + + for (auto const PhidxB : ArrayRef(Phis).drop_front(Phidx + 1)) { + + // At this point we know that we have found all the elements that fit in + // this group so we'll stop. + if (Count == 0) + break; + + // B is already in a group so we don't want to look at it again. + if (Seen.test(PhidxB)) + continue; + + auto UserIterPhi2 = TE.Scalars[PhidxB]->user_begin(); + if (UserIterPhi2.atEnd()) { + Seen.set(PhidxB); + continue; + } + + auto *FirstUserOfPhi2 = cast(*UserIterPhi2); + + if (auto *IE2 = dyn_cast(FirstUserOfPhi2)) { + if (auto *IE1 = dyn_cast(FirstUserOfPhi1)) { + if (areTwoInsertFromSameBuildVector( + IE1, IE2, + [](InsertElementInst *II) { return II->getOperand(0); })) { + Groups[Phidx].push_back(PhidxB); + Seen.set(PhidxB); + --Count; + continue; + } + } + } else if (auto *EE1 = dyn_cast(FirstUserOfPhi1)) { + if (auto *EE2 = dyn_cast(FirstUserOfPhi2)) { + if (EE1->getVectorOperand() == EE2->getVectorOperand()) { + Groups[Phidx].push_back(PhidxB); + Seen.set(PhidxB); + continue; + } + } + } + } + + std::sort( + Groups[Phidx].begin(), Groups[Phidx].end(), + [TE](unsigned I1, unsigned I2) { + auto U1 = TE.Scalars[I1]->user_begin(); + if (U1.atEnd()) + return false; + auto U2 = TE.Scalars[I2]->user_begin(); + if (U2.atEnd()) + return true; + + if (auto *FirstUserOfPhi1 = dyn_cast(*(U1))) { + if (auto *FirstUserOfPhi2 = dyn_cast(*(U2))) { + return getElementIndex(FirstUserOfPhi1) < + getElementIndex(FirstUserOfPhi2); + } + } + + if (auto *FirstUserOfPhi1 = dyn_cast(*(U1))) { + if (auto *FirstUserOfPhi2 = dyn_cast(*(U2))) { + return FirstUserOfPhi1->getIndexOperand() < + FirstUserOfPhi2->getIndexOperand(); + } + } + + llvm_unreachable( + "Found something other than InsertElement or ExtractElement"); + }); + } + + // Sort the groups on the basis of their ordering in the block. + std::sort(Groups.begin(), Groups.end(), + [TE](SmallVector I1, SmallVector I2) { + if (I1.empty()) + return false; + if (I2.empty()) + return true; + + auto PhidxA = I1.front(); + auto *InstA = TE.Scalars[PhidxA]; + auto *Phi1 = cast(InstA); + + auto PhidxB = I2.front(); + auto *InstB = TE.Scalars[PhidxB]; + auto *Phi2 = cast(InstB); + + return Phi1->comesBefore(Phi2); + }); + OrdersType ResOrder(TE.Scalars.size()); for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) PhiToId[Id] = Id; - stable_sort(Phis, PHICompare); + + Phis.clear(); + for (auto const &group : Groups) { + for (auto const element : group) + Phis.push_back(element); + } + for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id) ResOrder[Id] = PhiToId[Phis[Id]]; if (isIdentityOrder(ResOrder)) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll index e4b6c06b79fc1..4b99e2a8ff795 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll @@ -8,7 +8,7 @@ ; YAML-NEXT: Function: foo ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '2' +; YAML-NEXT: - Cost: '0' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '2' @@ -28,7 +28,7 @@ ; YAML-NEXT: Function: foo ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '2' +; YAML-NEXT: - Cost: '3' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '9' @@ -45,36 +45,37 @@ define void @foo() personality ptr @bar { ; CHECK: bb3: ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i64> [ [[TMP4:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = invoke i64 poison(ptr addrspace(1) nonnull poison, i64 0, i64 0, i64 poison) [ "deopt"() ] -; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] +; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] ; CHECK: bb4: ; CHECK-NEXT: br i1 poison, label [[BB11:%.*]], label [[BB5:%.*]] ; CHECK: bb5: ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP4]] = phi <2 x i64> [ , [[BB8:%.*]] ] +; CHECK-NEXT: [[TMP4]] = phi <2 x i64> [ , [[BB8:%.*]] ] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i64 [ poison, [[BB8]] ], [ poison, [[BB5]] ] ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[LOCAL_5_84111]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = invoke i64 poison(ptr addrspace(1) nonnull poison, i64 poison, i64 poison, i64 poison) [ "deopt"() ] -; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] +; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP7]] = phi <2 x i64> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] +; CHECK-NEXT: [[TMP7]] = phi <2 x i64> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP8]] = phi <2 x i64> [ [[TMP2]], [[BB3]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i64> [ [[TMP2]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i64 } -; CHECK-NEXT: cleanup +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[TMP9]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP9]] = phi <2 x i64> [ [[TMP5]], [[BB7]] ] +; CHECK-NEXT: [[TMP10]] = phi <2 x i64> [ [[TMP5]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i64 } -; CHECK-NEXT: cleanup +; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] ; bb1: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll index dbc4f3d59d4f9..6f63b2a8f9aac 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll @@ -13,6 +13,7 @@ define void @test() { ; CHECK-NEXT: br label %[[BB64]] ; CHECK: [[BB64]]: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I66:%.*]] = load float, ptr poison, align 16 ; CHECK-NEXT: [[I67:%.*]] = load float, ptr poison, align 4 ; CHECK-NEXT: [[I68:%.*]] = load float, ptr poison, align 8 @@ -25,48 +26,45 @@ define void @test() { ; CHECK-NEXT: [[I75:%.*]] = load float, ptr poison, align 16 ; CHECK-NEXT: [[I76:%.*]] = load float, ptr poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x float> poison, float [[I76]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x float> [[TMP1]], float [[I75]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x float> [[TMP2]], float [[I74]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x float> [[TMP3]], float [[I73]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x float> [[TMP4]], float [[I71]], i32 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x float> [[TMP5]], float [[I70]], i32 5 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x float> [[TMP1]], float [[I75]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x float> [[TMP3]], float [[I74]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x float> [[TMP4]], float [[I73]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x float> [[TMP5]], float [[I72]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I71]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I70]], i32 8 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I69]], i32 10 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I68]], i32 13 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 14 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x float> [[TMP11]], float [[I66]], i32 15 ; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]] ; CHECK: [[BB77]]: -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x float> [[TMP12]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> ; CHECK-NEXT: br label %[[BB78:.*]] ; CHECK: [[BB78]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ] -; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP14]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP32:%.*]], %[[BB78]] ] +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2) +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> [[TMP2]], i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]] ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison ; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison -; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP32]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]] ; CHECK: [[BB167]]: -; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15 +; CHECK-NEXT: [[TMP30:%.*]] = phi <16 x float> [ [[TMP12]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP30]], i32 14 ; CHECK-NEXT: store float [[TMP33]], ptr poison, align 1 -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP30]], i32 6 ; CHECK-NEXT: store float [[TMP34]], ptr poison, align 1 -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP30]], i32 10 ; CHECK-NEXT: br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]] ; CHECK: [[BB184]]: ; CHECK-NEXT: br label %[[BB185:.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll index 43c42c1ea2bfb..0b948d7882183 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll @@ -13,11 +13,11 @@ define void @test() { ; CHECK: [[BB6]]: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> , <2 x i32> ; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]] ; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll index de99654d84eb8..749257a9ddc38 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll @@ -25,7 +25,7 @@ define i1 @foo() { ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> zeroinitializer, <4 x float> [[TMP5]], <4 x float> [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fsub <4 x float> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> ; CHECK-NEXT: br label [[TMP11]] ; CHECK: 11: ; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x float> [ [[TMP10]], [[TMP2]] ], [ zeroinitializer, [[TMP0:%.*]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index 2f69a01bb5e37..e63840ebf8f7f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -13,19 +13,19 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK: bb1: ; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index 9b35fcaebadf9..6f3b1f8fa9cd4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -13,19 +13,19 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK: bb1: ; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll index 1c0fa1d85f9e4..d4d5be89532a1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll @@ -27,23 +27,24 @@ define void @SIM4() { ; CHECK: land.rhs.lr.ph: ; CHECK-NEXT: unreachable ; CHECK: if.end98: +; CHECK-NEXT: [[FROM299:%.*]] = getelementptr inbounds [[STRUCT__EXON_T_12_103_220_363_480_649_740_857_1039_1065_1078_1091_1117_1130_1156_1169_1195_1221_1234_1286_1299_1312_1338_1429_1455_1468_1494_1520_1884_1897_1975_2066_2105_2170_2171:%.*]], ptr undef, i64 0, i32 1 ; CHECK-NEXT: br i1 undef, label [[LAND_LHS_TRUE167]], label [[IF_THEN103:%.*]] ; CHECK: if.then103: ; CHECK-NEXT: [[DOTSUB100:%.*]] = select i1 undef, i32 250, i32 undef ; CHECK-NEXT: [[MUL114:%.*]] = shl nsw i32 [[DOTSUB100]], 2 ; CHECK-NEXT: [[COND125:%.*]] = select i1 undef, i32 undef, i32 [[MUL114]] -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[COND125]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[DOTSUB100]], i32 1 ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] ; CHECK: for.cond.i: -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ undef, [[LAND_RHS_I874:%.*]] ], [ [[TMP1]], [[IF_THEN103]] ] +; CHECK-NEXT: [[ROW_0_I:%.*]] = phi i32 [ undef, [[LAND_RHS_I874:%.*]] ], [ [[DOTSUB100]], [[IF_THEN103]] ] +; CHECK-NEXT: [[COL_0_I:%.*]] = phi i32 [ undef, [[LAND_RHS_I874]] ], [ [[COND125]], [[IF_THEN103]] ] ; CHECK-NEXT: br i1 undef, label [[LAND_RHS_I874]], label [[FOR_END_I:%.*]] ; CHECK: land.rhs.i874: ; CHECK-NEXT: br i1 undef, label [[FOR_COND_I]], label [[FOR_END_I]] ; CHECK: for.end.i: ; CHECK-NEXT: br i1 undef, label [[IF_THEN_I:%.*]], label [[IF_END_I:%.*]] ; CHECK: if.then.i: -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], undef +; CHECK-NEXT: [[ADD14_I:%.*]] = add nsw i32 [[ROW_0_I]], undef +; CHECK-NEXT: [[ADD15_I:%.*]] = add nsw i32 [[COL_0_I]], undef ; CHECK-NEXT: br label [[EXTEND_BW_EXIT:%.*]] ; CHECK: if.end.i: ; CHECK-NEXT: [[ADD16_I:%.*]] = add i32 [[COND125]], [[DOTSUB100]] @@ -64,11 +65,14 @@ define void @SIM4() { ; CHECK: while.end275.i: ; CHECK-NEXT: br label [[EXTEND_BW_EXIT]] ; CHECK: extend_bw.exit: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ [[TMP3]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ] +; CHECK-NEXT: [[ADD14_I1262:%.*]] = phi i32 [ [[ADD14_I]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ] +; CHECK-NEXT: [[ADD15_I1261:%.*]] = phi i32 [ [[ADD15_I]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ] ; CHECK-NEXT: br i1 false, label [[IF_THEN157:%.*]], label [[LAND_LHS_TRUE167]] ; CHECK: if.then157: -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], -; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr undef, align 4 +; CHECK-NEXT: [[ADD158:%.*]] = add nsw i32 [[ADD14_I1262]], 1 +; CHECK-NEXT: store i32 [[ADD158]], ptr [[FROM299]], align 4 +; CHECK-NEXT: [[ADD160:%.*]] = add nsw i32 [[ADD15_I1261]], 1 +; CHECK-NEXT: store i32 [[ADD160]], ptr undef, align 4 ; CHECK-NEXT: br label [[LAND_LHS_TRUE167]] ; CHECK: land.lhs.true167: ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll index 7476c77c58320..31f97647365cd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll @@ -13,7 +13,7 @@ define void @test() personality ptr null { ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ poison, %[[BB2]] ] ; CHECK-NEXT: [[LANDINGPAD:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 ; CHECK-NEXT: call void null(i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]]) ; CHECK-NEXT: ret void ; CHECK: [[BB65]]: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll index fa33621de5ae7..575c4bc726f26 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll @@ -16,11 +16,11 @@ define i64 @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 -; CHECK-NEXT: [[ADD]] = add i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[ADD]] = add i64 [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0 ; CHECK-NEXT: [[TMP9]] = or i64 [[PHI5]], 0 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0 -; CHECK-NEXT: [[TMP7]] = insertelement <2 x i64> , i64 [[ADD]], i32 0 +; CHECK-NEXT: [[TMP7]] = insertelement <2 x i64> , i64 [[ADD]], i32 1 ; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]] ; ; FORCED-LABEL: define i64 @foo() { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll index e0d7c12f70c2e..4168cdb935a5c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll @@ -13,7 +13,7 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK: while.body: ; CHECK-NEXT: [[A_020:%.*]] = phi ptr [ [[A_020_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 1 ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 @@ -24,7 +24,7 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]] ; CHECK-NEXT: ] ; CHECK: sw.bb: -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2 @@ -36,7 +36,7 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1 ; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] ; CHECK: while.body.backedge: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 47b42bc8f32a7..d33231439f87f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -21,7 +21,7 @@ define void @foo() personality ptr @bar { ; CHECK: bb5: ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ , [[BB8:%.*]] ] +; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ , [[BB8:%.*]] ] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ] @@ -32,7 +32,7 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] ; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: @@ -43,9 +43,10 @@ define void @foo() personality ptr @bar { ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup +; CHECK-NEXT: [[TMP10]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: br label [[BB9]] ; bb1: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll index e5d7ad138b4de..0c6cb4e9a21b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll @@ -8,7 +8,7 @@ ; YAML: Function: test ; YAML: Args: ; YAML: - String: 'Stores SLP vectorized with cost ' -; YAML: - Cost: '-6' +; YAML: - Cost: '-5' ; YAML: - String: ' and with tree size ' ; YAML: - TreeSize: '14' ; YAML: ... @@ -34,10 +34,10 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) { ; CHECK-NEXT: [[CALL1:%.*]] = call fast float @foo(float [[B]]) ; CHECK-NEXT: [[CALL2:%.*]] = call fast float @foo(float [[C]]) ; CHECK-NEXT: [[CALL3:%.*]] = call fast float @foo(float [[D]]) -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[CALL0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[CALL1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[CALL2]], i32 2 -; CHECK-NEXT: [[TMP5]] = insertelement <4 x float> [[TMP4]], float [[CALL3]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[CALL3]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[CALL2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[CALL1]], i32 2 +; CHECK-NEXT: [[TMP5]] = insertelement <4 x float> [[TMP4]], float [[CALL0]], i32 3 ; CHECK-NEXT: br i1 poison, label [[USERBLOCK0]], label [[USERBLOCK1]] ; CHECK: UserBlock1: ; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP5]], [[BLKX]] ], [ [[TMP9:%.*]], [[LOOP_INNER]] ] @@ -51,7 +51,8 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) { ; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ] ; CHECK-NEXT: [[IDX0:%.*]] = add i64 0, poison ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IDX0]] -; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[GEP0]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[GEP0]], align 4 ; CHECK-NEXT: ret void ; Entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll index 51ce970bf06bc..e16ff250a54c4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-as-operand-reorder.ll @@ -9,7 +9,7 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP3:%.*]], %[[BB3:.*]] ] ; CHECK-NEXT: br i1 false, label %[[BB6:.*]], label %[[BB3]] ; CHECK: [[BB3]]: -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> , <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP3]] = add <2 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: br i1 false, label %[[BB6]], label %[[BB1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll index 5cbf78435233b..dd6c88d2406f0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-replace-extractelement.ll @@ -8,12 +8,12 @@ define void @test() { ; CHECK-NEXT: br label %[[BB1:.*]] ; CHECK: [[BB1]]: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB1]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> zeroinitializer) ; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TRUNC]] -; CHECK-NEXT: [[TMP4]] = insertelement <2 x i32> , i32 [[OP_RDX1]], i32 1 +; CHECK-NEXT: [[TMP4]] = insertelement <2 x i32> , i32 [[OP_RDX1]], i32 0 ; CHECK-NEXT: br label %[[BB1]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll index ce6a477f30a08..95bfa91e1d5f3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll @@ -6,15 +6,15 @@ define i32 @main(i32 %0) { ; CHECK-NEXT: for.cond.preheader: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]] ; CHECK: for.inc.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 0 ; CHECK-NEXT: br i1 false, label [[FOR_END]], label [[L1_PREHEADER:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[DOTPR:%.*]] = phi i32 [ 0, [[FOR_INC_PREHEADER]] ], [ 0, [[FOR_COND_PREHEADER:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[DOTPR]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[L1_PREHEADER]] ; CHECK: L1.preheader: -; CHECK-NEXT: [[TMP3:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <8 x i32> [ [[TMP3]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ] ; CHECK-NEXT: ret i32 0 ; for.cond.preheader: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll index eb5e218f057ce..c44a7f9e402dc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll @@ -31,7 +31,7 @@ define void @test() { ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll index f870cb44f4e5f..86817f0472312 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -31,7 +31,7 @@ define void @test() { ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll index dca34b681032c..e9816f53cf208 100644 --- a/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll @@ -1,32 +1,58 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s %} -; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu | FileCheck %s %} +; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck --check-prefix=X86 %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu | FileCheck --check-prefix=AARCH %s %} define void @test() { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BODY:%.*]] -; CHECK: body: -; CHECK-NEXT: [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[BODY]] ] -; CHECK-NEXT: [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00 -; CHECK-NEXT: [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00 -; CHECK-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]] -; CHECK-NEXT: [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00 -; CHECK-NEXT: br i1 false, label [[BODY]], label [[EXIT:%.*]] -; CHECK: exit: -; CHECK-NEXT: br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]] -; CHECK: if.then135.i: -; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt <2 x double> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> , <2 x i1> [[TMP1]], <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], zeroinitializer -; CHECK-NEXT: br label [[IF_END209_I]] -; CHECK: if.end209.i: -; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x double> [ [[TMP6]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ] -; CHECK-NEXT: ret void +; X86-LABEL: @test( +; X86-NEXT: entry: +; X86-NEXT: br label [[BODY:%.*]] +; X86: body: +; X86-NEXT: [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ] +; X86-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[BODY]] ] +; X86-NEXT: [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00 +; X86-NEXT: [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00 +; X86-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]] +; X86-NEXT: [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00 +; X86-NEXT: br i1 false, label [[BODY]], label [[EXIT:%.*]] +; X86: exit: +; X86-NEXT: br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]] +; X86: if.then135.i: +; X86-NEXT: [[TMP1:%.*]] = fcmp fast olt <2 x double> [[TMP0]], zeroinitializer +; X86-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> , <2 x i1> [[TMP1]], <2 x i32> +; X86-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> zeroinitializer +; X86-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP3]] +; X86-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP4]], zeroinitializer +; X86-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], zeroinitializer +; X86-NEXT: br label [[IF_END209_I]] +; X86: if.end209.i: +; X86-NEXT: [[TMP7:%.*]] = phi <2 x double> [ [[TMP6]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ] +; X86-NEXT: ret void +; +; AARCH-LABEL: @test( +; AARCH-NEXT: entry: +; AARCH-NEXT: br label [[BODY:%.*]] +; AARCH: body: +; AARCH-NEXT: [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ] +; AARCH-NEXT: [[PHI2:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[BODY]] ] +; AARCH-NEXT: [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00 +; AARCH-NEXT: [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00 +; AARCH-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]] +; AARCH-NEXT: [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00 +; AARCH-NEXT: br i1 false, label [[BODY]], label [[EXIT:%.*]] +; AARCH: exit: +; AARCH-NEXT: br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]] +; AARCH: if.then135.i: +; AARCH-NEXT: [[CMP145_I:%.*]] = fcmp fast olt double [[PHI1]], 0.000000e+00 +; AARCH-NEXT: [[CMP152_I:%.*]] = fcmp fast olt double [[PHI2]], 0.000000e+00 +; AARCH-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> , i1 [[CMP152_I]], i32 0 +; AARCH-NEXT: [[TMP1:%.*]] = select <2 x i1> [[TMP0]], <2 x double> zeroinitializer, <2 x double> zeroinitializer +; AARCH-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP1]] +; AARCH-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer +; AARCH-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], zeroinitializer +; AARCH-NEXT: br label [[IF_END209_I]] +; AARCH: if.end209.i: +; AARCH-NEXT: [[TMP5:%.*]] = phi <2 x double> [ [[TMP4]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ] +; AARCH-NEXT: ret void ; entry: br label %body diff --git a/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll index b9802a0adb8aa..c81d1f3db94b1 100644 --- a/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll @@ -159,13 +159,13 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[ARG1:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> [[TMP0]], i8 [[ARG0:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[ARG0:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> [[TMP0]], i8 [[ARG1:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[ARG2:%.*]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] @@ -196,13 +196,13 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[ARG1:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> [[TMP0]], i8 [[ARG3:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[ARG0:%.*]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG2:%.*]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[ARG0:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> [[TMP0]], i8 [[ARG1:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[ARG2:%.*]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll b/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll index f6bed797b9ba9..45be77cc21f14 100644 --- a/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll +++ b/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll @@ -6,8 +6,8 @@ define void @foo() { ; CHECK-LABEL: define void @foo() { ; CHECK-NEXT: bci_0: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) null, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> , i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> , i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: br label [[BCI_252:%.*]] ; CHECK: bci_252: ; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[BCI_0:%.*]] ], [ [[TMP16:%.*]], [[BCI_252_1:%.*]] ]