diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d8ca22f3f91a4..cc80b22ba26ab 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14847,14 +14847,20 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { return true; if (Opcodes1.size() > Opcodes2.size()) return false; - std::optional ConstOrder; for (int I = 0, E = Opcodes1.size(); I < E; ++I) { // Undefs are compatible with any other value. if (isa(Opcodes1[I]) || isa(Opcodes2[I])) { - if (!ConstOrder) - ConstOrder = - !isa(Opcodes1[I]) && isa(Opcodes2[I]); - continue; + if (isa(Opcodes1[I])) + return true; + if (isa(Opcodes2[I])) + return false; + if (isa(Opcodes1[I]) && !isa(Opcodes1[I])) + return true; + if (isa(Opcodes2[I]) && !isa(Opcodes2[I])) + return false; + if (isa(Opcodes1[I]) && isa(Opcodes2[I])) + continue; + return isa(Opcodes2[I]); } if (auto *I1 = dyn_cast(Opcodes1[I])) if (auto *I2 = dyn_cast(Opcodes2[I])) { @@ -14870,21 +14876,26 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (NodeI1 != NodeI2) return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); InstructionsState S = getSameOpcode({I1, I2}, *TLI); - if (S.getOpcode()) + if (S.getOpcode() && !S.isAltShuffle()) continue; return I1->getOpcode() < I2->getOpcode(); } - if (isa(Opcodes1[I]) && isa(Opcodes2[I])) { - if (!ConstOrder) - ConstOrder = Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID(); - continue; - } + if (isa(Opcodes1[I]) && isa(Opcodes2[I])) + return Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID(); + if (isa(Opcodes1[I])) + return true; + if (isa(Opcodes2[I])) + return false; + if (isa(Opcodes1[I])) + return true; + if (isa(Opcodes2[I])) + return false; if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID()) return true; if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID()) return false; } - return ConstOrder && *ConstOrder; + return false; }; auto AreCompatiblePHIs = [&PHIToOpcodes, this](Value *V1, Value *V2) { if (V1 == V2) @@ -14932,6 +14943,9 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { Incoming.push_back(P); } + if (Incoming.size() <= 1) + break; + // Find the corresponding non-phi nodes for better matching when trying to // build the tree. for (Value *V : Incoming) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll index 53667922be801..78a821aa6e5e2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll @@ -23,8 +23,8 @@ define i1 @foo() { ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> zeroinitializer, <4 x float> [[TMP5]], <4 x float> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fsub <4 x float> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = fsub <4 x float> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> ; CHECK-NEXT: br label [[TMP11]] ; CHECK: 11: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index e63840ebf8f7f..2f69a01bb5e37 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -13,19 +13,19 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK: bb1: ; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index 6f3b1f8fa9cd4..9b35fcaebadf9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -13,19 +13,19 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK: bb1: ; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll index 403546f67c8a1..b9db7236106a1 100644 --- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll @@ -137,23 +137,23 @@ define void @phi_float32(half %hval, float %fval) { ; MAX256-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float ; MAX256-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float ; MAX256-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0 -; MAX256-NEXT: [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX256-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 -; MAX256-NEXT: [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX256-NEXT: [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE11]], [[SHUFFLE12]] -; MAX256-NEXT: [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]] -; MAX256-NEXT: [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 -; MAX256-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX256-NEXT: [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE12]] -; MAX256-NEXT: [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]] -; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 -; MAX256-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX256-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE5]], [[SHUFFLE12]] +; MAX256-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP2:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 +; MAX256-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP4:%.*]] = fmul <8 x float> [[TMP1]], [[TMP3]] +; MAX256-NEXT: [[TMP5:%.*]] = fadd <8 x float> zeroinitializer, [[TMP4]] +; MAX256-NEXT: [[TMP6:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 +; MAX256-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[TMP7]], [[TMP3]] ; MAX256-NEXT: [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]] -; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 -; MAX256-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX256-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE8]], [[SHUFFLE12]] -; MAX256-NEXT: [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]] +; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 +; MAX256-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP12:%.*]] = fmul <8 x float> [[TMP11]], [[TMP3]] +; MAX256-NEXT: [[TMP13:%.*]] = fadd <8 x float> zeroinitializer, [[TMP12]] +; MAX256-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 +; MAX256-NEXT: [[TMP15:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP15]], [[TMP3]] +; MAX256-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]] ; MAX256-NEXT: switch i32 undef, label [[BB5:%.*]] [ ; MAX256-NEXT: i32 0, label [[BB2:%.*]] ; MAX256-NEXT: i32 1, label [[BB3:%.*]] @@ -166,12 +166,12 @@ define void @phi_float32(half %hval, float %fval) { ; MAX256: bb5: ; MAX256-NEXT: br label [[BB2]] ; MAX256: bb2: -; MAX256-NEXT: [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ] -; MAX256-NEXT: [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ] -; MAX256-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ] -; MAX256-NEXT: [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ] -; MAX256-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 -; MAX256-NEXT: store float [[TMP17]], ptr undef, align 4 +; MAX256-NEXT: [[TMP18:%.*]] = phi <8 x float> [ [[TMP5]], [[BB3]] ], [ [[TMP5]], [[BB4]] ], [ [[TMP5]], [[BB5]] ], [ [[TMP3]], [[BB1]] ] +; MAX256-NEXT: [[TMP19:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP17]], [[BB1]] ] +; MAX256-NEXT: [[TMP20:%.*]] = phi <8 x float> [ [[TMP13]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP13]], [[BB5]] ], [ [[TMP13]], [[BB1]] ] +; MAX256-NEXT: [[TMP21:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP3]], [[BB1]] ] +; MAX256-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP20]], i32 7 +; MAX256-NEXT: store float [[TMP22]], ptr undef, align 4 ; MAX256-NEXT: ret void ; ; MAX1024-LABEL: @phi_float32( @@ -183,23 +183,23 @@ define void @phi_float32(half %hval, float %fval) { ; MAX1024-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float ; MAX1024-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float ; MAX1024-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0 -; MAX1024-NEXT: [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX1024-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 -; MAX1024-NEXT: [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX1024-NEXT: [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE11]], [[SHUFFLE12]] -; MAX1024-NEXT: [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]] -; MAX1024-NEXT: [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 -; MAX1024-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX1024-NEXT: [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE12]] -; MAX1024-NEXT: [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]] -; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 -; MAX1024-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX1024-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE5]], [[SHUFFLE12]] +; MAX1024-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP2:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 +; MAX1024-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP4:%.*]] = fmul <8 x float> [[TMP1]], [[TMP3]] +; MAX1024-NEXT: [[TMP5:%.*]] = fadd <8 x float> zeroinitializer, [[TMP4]] +; MAX1024-NEXT: [[TMP6:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 +; MAX1024-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[TMP7]], [[TMP3]] ; MAX1024-NEXT: [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]] -; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 -; MAX1024-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer -; MAX1024-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE8]], [[SHUFFLE12]] -; MAX1024-NEXT: [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]] +; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 +; MAX1024-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP12:%.*]] = fmul <8 x float> [[TMP11]], [[TMP3]] +; MAX1024-NEXT: [[TMP13:%.*]] = fadd <8 x float> zeroinitializer, [[TMP12]] +; MAX1024-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 +; MAX1024-NEXT: [[TMP15:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP15]], [[TMP3]] +; MAX1024-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]] ; MAX1024-NEXT: switch i32 undef, label [[BB5:%.*]] [ ; MAX1024-NEXT: i32 0, label [[BB2:%.*]] ; MAX1024-NEXT: i32 1, label [[BB3:%.*]] @@ -212,12 +212,12 @@ define void @phi_float32(half %hval, float %fval) { ; MAX1024: bb5: ; MAX1024-NEXT: br label [[BB2]] ; MAX1024: bb2: -; MAX1024-NEXT: [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ] -; MAX1024-NEXT: [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ] -; MAX1024-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ] -; MAX1024-NEXT: [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ] -; MAX1024-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 -; MAX1024-NEXT: store float [[TMP17]], ptr undef, align 4 +; MAX1024-NEXT: [[TMP18:%.*]] = phi <8 x float> [ [[TMP5]], [[BB3]] ], [ [[TMP5]], [[BB4]] ], [ [[TMP5]], [[BB5]] ], [ [[TMP3]], [[BB1]] ] +; MAX1024-NEXT: [[TMP19:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP17]], [[BB1]] ] +; MAX1024-NEXT: [[TMP20:%.*]] = phi <8 x float> [ [[TMP13]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP13]], [[BB5]] ], [ [[TMP13]], [[BB1]] ] +; MAX1024-NEXT: [[TMP21:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP3]], [[BB1]] ] +; MAX1024-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP20]], i32 7 +; MAX1024-NEXT: store float [[TMP22]], ptr undef, align 4 ; MAX1024-NEXT: ret void ; bb: