Skip to content

Commit

Permalink
[SLP]Fix comparator for PHI nodes comparison.
Browse files Browse the repository at this point in the history
Fixed comparator for PHI nodes sorting to meet the criteria for strict
weak ordering.
  • Loading branch information
alexey-bataev committed Aug 14, 2023
1 parent 4d425f8 commit 63c7815
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 70 deletions.
38 changes: 26 additions & 12 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14847,14 +14847,20 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
return true;
if (Opcodes1.size() > Opcodes2.size())
return false;
std::optional<bool> ConstOrder;
for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
// Undefs are compatible with any other value.
if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I])) {
if (!ConstOrder)
ConstOrder =
!isa<UndefValue>(Opcodes1[I]) && isa<UndefValue>(Opcodes2[I]);
continue;
if (isa<Instruction>(Opcodes1[I]))
return true;
if (isa<Instruction>(Opcodes2[I]))
return false;
if (isa<Constant>(Opcodes1[I]) && !isa<UndefValue>(Opcodes1[I]))
return true;
if (isa<Constant>(Opcodes2[I]) && !isa<UndefValue>(Opcodes2[I]))
return false;
if (isa<UndefValue>(Opcodes1[I]) && isa<UndefValue>(Opcodes2[I]))
continue;
return isa<UndefValue>(Opcodes2[I]);
}
if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
Expand All @@ -14870,21 +14876,26 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
InstructionsState S = getSameOpcode({I1, I2}, *TLI);
if (S.getOpcode())
if (S.getOpcode() && !S.isAltShuffle())
continue;
return I1->getOpcode() < I2->getOpcode();
}
if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I])) {
if (!ConstOrder)
ConstOrder = Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID();
continue;
}
if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
return Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID();
if (isa<Instruction>(Opcodes1[I]))
return true;
if (isa<Instruction>(Opcodes2[I]))
return false;
if (isa<Constant>(Opcodes1[I]))
return true;
if (isa<Constant>(Opcodes2[I]))
return false;
if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
return true;
if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
return false;
}
return ConstOrder && *ConstOrder;
return false;
};
auto AreCompatiblePHIs = [&PHIToOpcodes, this](Value *V1, Value *V2) {
if (V1 == V2)
Expand Down Expand Up @@ -14932,6 +14943,9 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
Incoming.push_back(P);
}

if (Incoming.size() <= 1)
break;

// Find the corresponding non-phi nodes for better matching when trying to
// build the tree.
for (Value *V : Incoming) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ define i1 @foo() {
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> zeroinitializer, <4 x float> [[TMP5]], <4 x float> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = fsub <4 x float> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = fsub <4 x float> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: br label [[TMP11]]
; CHECK: 11:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK: bb1:
; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[ADD]], i32 0
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK: bb1:
; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[ADD]], i32 0
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
Expand Down
88 changes: 44 additions & 44 deletions llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,23 +137,23 @@ define void @phi_float32(half %hval, float %fval) {
; MAX256-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float
; MAX256-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float
; MAX256-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0
; MAX256-NEXT: [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
; MAX256-NEXT: [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE11]], [[SHUFFLE12]]
; MAX256-NEXT: [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]]
; MAX256-NEXT: [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
; MAX256-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE12]]
; MAX256-NEXT: [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]]
; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
; MAX256-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE5]], [[SHUFFLE12]]
; MAX256-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP2:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
; MAX256-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP4:%.*]] = fmul <8 x float> [[TMP1]], [[TMP3]]
; MAX256-NEXT: [[TMP5:%.*]] = fadd <8 x float> zeroinitializer, [[TMP4]]
; MAX256-NEXT: [[TMP6:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
; MAX256-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[TMP7]], [[TMP3]]
; MAX256-NEXT: [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]]
; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
; MAX256-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE8]], [[SHUFFLE12]]
; MAX256-NEXT: [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]]
; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
; MAX256-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP12:%.*]] = fmul <8 x float> [[TMP11]], [[TMP3]]
; MAX256-NEXT: [[TMP13:%.*]] = fadd <8 x float> zeroinitializer, [[TMP12]]
; MAX256-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
; MAX256-NEXT: [[TMP15:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX256-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP15]], [[TMP3]]
; MAX256-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]]
; MAX256-NEXT: switch i32 undef, label [[BB5:%.*]] [
; MAX256-NEXT: i32 0, label [[BB2:%.*]]
; MAX256-NEXT: i32 1, label [[BB3:%.*]]
Expand All @@ -166,12 +166,12 @@ define void @phi_float32(half %hval, float %fval) {
; MAX256: bb5:
; MAX256-NEXT: br label [[BB2]]
; MAX256: bb2:
; MAX256-NEXT: [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
; MAX256-NEXT: [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ]
; MAX256-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ]
; MAX256-NEXT: [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
; MAX256-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
; MAX256-NEXT: store float [[TMP17]], ptr undef, align 4
; MAX256-NEXT: [[TMP18:%.*]] = phi <8 x float> [ [[TMP5]], [[BB3]] ], [ [[TMP5]], [[BB4]] ], [ [[TMP5]], [[BB5]] ], [ [[TMP3]], [[BB1]] ]
; MAX256-NEXT: [[TMP19:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP17]], [[BB1]] ]
; MAX256-NEXT: [[TMP20:%.*]] = phi <8 x float> [ [[TMP13]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP13]], [[BB5]] ], [ [[TMP13]], [[BB1]] ]
; MAX256-NEXT: [[TMP21:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP3]], [[BB1]] ]
; MAX256-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP20]], i32 7
; MAX256-NEXT: store float [[TMP22]], ptr undef, align 4
; MAX256-NEXT: ret void
;
; MAX1024-LABEL: @phi_float32(
Expand All @@ -183,23 +183,23 @@ define void @phi_float32(half %hval, float %fval) {
; MAX1024-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float
; MAX1024-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float
; MAX1024-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0
; MAX1024-NEXT: [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
; MAX1024-NEXT: [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE11]], [[SHUFFLE12]]
; MAX1024-NEXT: [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]]
; MAX1024-NEXT: [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
; MAX1024-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE12]]
; MAX1024-NEXT: [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]]
; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
; MAX1024-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE5]], [[SHUFFLE12]]
; MAX1024-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP2:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
; MAX1024-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP4:%.*]] = fmul <8 x float> [[TMP1]], [[TMP3]]
; MAX1024-NEXT: [[TMP5:%.*]] = fadd <8 x float> zeroinitializer, [[TMP4]]
; MAX1024-NEXT: [[TMP6:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
; MAX1024-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[TMP7]], [[TMP3]]
; MAX1024-NEXT: [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]]
; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
; MAX1024-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE8]], [[SHUFFLE12]]
; MAX1024-NEXT: [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]]
; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
; MAX1024-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP12:%.*]] = fmul <8 x float> [[TMP11]], [[TMP3]]
; MAX1024-NEXT: [[TMP13:%.*]] = fadd <8 x float> zeroinitializer, [[TMP12]]
; MAX1024-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
; MAX1024-NEXT: [[TMP15:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> poison, <8 x i32> zeroinitializer
; MAX1024-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP15]], [[TMP3]]
; MAX1024-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]]
; MAX1024-NEXT: switch i32 undef, label [[BB5:%.*]] [
; MAX1024-NEXT: i32 0, label [[BB2:%.*]]
; MAX1024-NEXT: i32 1, label [[BB3:%.*]]
Expand All @@ -212,12 +212,12 @@ define void @phi_float32(half %hval, float %fval) {
; MAX1024: bb5:
; MAX1024-NEXT: br label [[BB2]]
; MAX1024: bb2:
; MAX1024-NEXT: [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
; MAX1024-NEXT: [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ]
; MAX1024-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ]
; MAX1024-NEXT: [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
; MAX1024-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
; MAX1024-NEXT: store float [[TMP17]], ptr undef, align 4
; MAX1024-NEXT: [[TMP18:%.*]] = phi <8 x float> [ [[TMP5]], [[BB3]] ], [ [[TMP5]], [[BB4]] ], [ [[TMP5]], [[BB5]] ], [ [[TMP3]], [[BB1]] ]
; MAX1024-NEXT: [[TMP19:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP17]], [[BB1]] ]
; MAX1024-NEXT: [[TMP20:%.*]] = phi <8 x float> [ [[TMP13]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP13]], [[BB5]] ], [ [[TMP13]], [[BB1]] ]
; MAX1024-NEXT: [[TMP21:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP3]], [[BB1]] ]
; MAX1024-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP20]], i32 7
; MAX1024-NEXT: store float [[TMP22]], ptr undef, align 4
; MAX1024-NEXT: ret void
;
bb:
Expand Down

0 comments on commit 63c7815

Please sign in to comment.