17 changes: 8 additions & 9 deletions llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ define ptr @test() {
; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP5:%.*]], [[BODY]] ], [ [[TMP1]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP6:%.*]], [[BODY]] ], [ [[TMP1]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr null, align 2
; CHECK-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP5]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[SHUFFLE]], <2 x double> [[TMP2]])
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP6]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP5]], <2 x double> [[TMP2]])
; CHECK-NEXT: br label [[BODY]]
;
entry:
Expand Down Expand Up @@ -54,8 +54,8 @@ define void @test1(ptr %agg.result, ptr %this) {
; CHECK: return:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_END]] ], [ <float 1.000000e+00, float 0.000000e+00>, [[LOR_LHS_FALSE]] ], [ <float 1.000000e+00, float 0.000000e+00>, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[C_I_I_I:%.*]] = getelementptr inbounds float, ptr [[AGG_RESULT:%.*]], i32 2
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: store <2 x float> [[SHUFFLE]], ptr [[C_I_I_I]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[C_I_I_I]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down Expand Up @@ -103,10 +103,10 @@ define void @test2(ptr %p1, ptr %p2) {
; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> <double 4.000000e+00, double 4.100000e+00>, [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], <double 2.000000e+00, double 2.100000e+00>
; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP10]], <double 3.000000e+00, double 3.100000e+00>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x double> [ [[TMP11]], [[BB1]] ], [ [[TMP16:%.*]], [[BB6:%.*]] ]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP15:%.*]], [[BB6:%.*]] ]
; CHECK-NEXT: [[X0:%.*]] = getelementptr inbounds double, ptr [[P2:%.*]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, ptr [[X0]], align 8
; CHECK-NEXT: br i1 poison, label [[BB3:%.*]], label [[BB6]]
Expand All @@ -117,8 +117,7 @@ define void @test2(ptr %p1, ptr %p2) {
; CHECK: bb5:
; CHECK-NEXT: br label [[BB6]]
; CHECK: bb6:
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x double> [ [[TMP13]], [[BB2]] ], [ [[TMP14]], [[BB4]] ], [ [[TMP14]], [[BB5]] ]
; CHECK-NEXT: [[TMP16]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP15]] = phi <2 x double> [ [[TMP13]], [[BB2]] ], [ [[TMP14]], [[BB4]] ], [ [[TMP14]], [[BB5]] ]
; CHECK-NEXT: br label [[BB2]]
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ define void @test() {
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 0, i64 7, i64 33, i64 1>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP4:%.*]] = ashr <4 x i32> [[TMP3]], zeroinitializer
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
; CHECK-NEXT: ret void
;
entry:
Expand Down Expand Up @@ -58,14 +58,14 @@ define void @test1() {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 0, i64 7, i64 33, i64 1>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32>
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
; CHECK-NEXT: ret void
;
entry:
Expand Down Expand Up @@ -112,11 +112,11 @@ define void @test_div() {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 0, i64 7, i64 33, i64 1>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP3]], <i32 1, i32 2, i32 1, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = udiv <4 x i32> [[TMP2]], <i32 2, i32 1, i32 2, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -164,11 +164,11 @@ define void @test_rem() {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 33, i64 7, i64 0>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 0, i64 7, i64 33, i64 1>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP3]], <i32 1, i32 2, i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = urem <4 x i32> [[TMP2]], <i32 1, i32 1, i32 2, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16
; CHECK-NEXT: ret void
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 8
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> poison)
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> <i32 0, i32 3, i32 0, i32 3, i32 2, i32 1, i32 2, i32 1>
; CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, ptr addrspace(1) [[TMP7]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr addrspace(1) [[TMP7]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> <i32 0, i32 3, i32 0, i32 3, i32 2, i32 1, i32 2, i32 1>
; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[TMP10]], [[TMP9]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x float> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> poison, <8 x i32> <i32 0, i32 5, i32 2, i32 7, i32 4, i32 1, i32 6, i32 3>
; CHECK-NEXT: store <8 x float> [[TMP13]], ptr addrspace(1) [[TMP6]], align 4
Expand Down
38 changes: 17 additions & 21 deletions llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,26 @@ define void @foo (ptr %A, ptr %B, ptr %Result) {
; CHECK-NEXT: [[TMP0:%.*]] = add i64 256, 0
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP20:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP16:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP15:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX:%.*]], ptr [[A:%.*]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], ptr [[B:%.*]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], ptr [[B]], i64 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x float> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[TMP8]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP16:%.*]] = fsub <2 x float> [[TMP11]], [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x float> [[TMP11]], [[TMP15]]
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> [[TMP17]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP20]] = fadd <2 x float> [[TMP2]], [[TMP21]]
; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[TMP0]]
; CHECK-NEXT: br i1 [[TMP19]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x float> [[TMP5]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP12:%.*]] = fsub <2 x float> [[TMP8]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x float> [[TMP8]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP15]] = fadd <2 x float> [[TMP2]], [[TMP14]]
; CHECK-NEXT: [[TMP16]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP16]], [[TMP0]]
; CHECK-NEXT: br i1 [[TMP17]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: store <2 x float> [[TMP20]], ptr [[RESULT:%.*]], align 4
; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[RESULT:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/SLPVectorizer/X86/return.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ entry:
define double @return2(ptr nocapture readonly %x) {
; CHECK-LABEL: @return2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i32 2
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[X]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret double [[ADD5]]
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,23 @@ define void @test() {
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 0>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP2]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP0]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> <float poison, float 0.000000e+00>, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]])
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP8]], zeroinitializer
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ]
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fsub <2 x float> [[TMP12]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP1]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x float> [[TMP12]], zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer
; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[ARRAYIDX21_I]], align 16
; CHECK-NEXT: [[TMP15:%.*]] = fsub <2 x float> [[TMP14]], zeroinitializer
; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[ARRAYIDX21_I]], align 16
; CHECK-NEXT: ret void
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,19 @@ define void @test(i1 %c, ptr %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
; CHECK: if:
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARG:%.*]], align 8
; CHECK-NEXT: [[ARG2_2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 24
; CHECK-NEXT: [[ARG2_2:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 24
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARG]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[ARG2_2]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP1]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: br label [[JOIN:%.*]]
; CHECK: else:
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARG]], align 8
; CHECK-NEXT: [[ARG_2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 24
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARG_2]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARG]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARG_2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP4]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i64> [ [[TMP5]], [[IF]] ], [ [[TMP10]], [[ELSE]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP6]], [[ELSE]] ]
; CHECK-NEXT: ret void
;
br i1 %c, label %if, label %else
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@ define i32 @foo(ptr nocapture readonly %diff) #0 {
; CHECK-NEXT: [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DIFF:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i64 [[TMP0]], 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DIFF]], i64 [[TMP1]]
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP3]]
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[ARRAYIDX6]], align 16
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP5]], [[A_088]]
Expand Down
15 changes: 7 additions & 8 deletions llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ define void @wombat(ptr %ptr, ptr %ptr1) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PTR1:%.*]], i32 3
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -1, i32 -1>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP1]], undef
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> undef, <4 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP6]]
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP27]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 -1, i32 -1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[TMP3]], undef
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> undef, <4 x i32> [[TMP2]]
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP5]]
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP27]], align 8
; CHECK-NEXT: ret void
;
bb:
Expand Down
99 changes: 33 additions & 66 deletions llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,84 +11,51 @@ declare double @llvm.sin.f64(double)

define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[A0:%.*]] = load double, ptr @src, align 8
; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 1), align 8
; CHECK-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 2), align 8
; CHECK-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 3), align 8
; CHECK-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 4), align 8
; CHECK-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 5), align 8
; CHECK-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 6), align 8
; CHECK-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 7), align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A6]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, ptr @src, align 8
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 2, i32 6>
; CHECK-NEXT: [[TMP3:%.*]] = call fast <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[A3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A7]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = call fast <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A4]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 3, i32 7>
; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 0, i32 4>
; CHECK-NEXT: [[TMP7:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 1, i32 5>
; CHECK-NEXT: [[TMP9:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[A1]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[A5]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]]
; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP12]]
; CHECK-NEXT: [[TMP15:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP14]]
; CHECK-NEXT: store <2 x double> [[TMP15]], ptr @dst, align 8
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP7]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP9]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP10]], [[TMP11]]
; CHECK-NEXT: store <2 x double> [[TMP12]], ptr @dst, align 8
; CHECK-NEXT: ret void
;
; VECLIB-LABEL: @test(
; VECLIB-NEXT: [[A0:%.*]] = load double, ptr @src, align 8
; VECLIB-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 1), align 8
; VECLIB-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 2), align 8
; VECLIB-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 3), align 8
; VECLIB-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 4), align 8
; VECLIB-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 5), align 8
; VECLIB-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 6), align 8
; VECLIB-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 7), align 8
; VECLIB-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0
; VECLIB-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A6]], i32 1
; VECLIB-NEXT: [[TMP1:%.*]] = load <8 x double>, ptr @src, align 8
; VECLIB-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 2, i32 6>
; VECLIB-NEXT: [[TMP3:%.*]] = call fast <2 x double> @__svml_sin2(<2 x double> [[TMP2]])
; VECLIB-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[A3]], i32 0
; VECLIB-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A7]], i32 1
; VECLIB-NEXT: [[TMP6:%.*]] = call fast <2 x double> @__svml_sin2(<2 x double> [[TMP5]])
; VECLIB-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; VECLIB-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A4]], i32 1
; VECLIB-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 3, i32 7>
; VECLIB-NEXT: [[TMP5:%.*]] = call fast <2 x double> @__svml_sin2(<2 x double> [[TMP4]])
; VECLIB-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 0, i32 4>
; VECLIB-NEXT: [[TMP7:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP6]])
; VECLIB-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 1, i32 5>
; VECLIB-NEXT: [[TMP9:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP8]])
; VECLIB-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[A1]], i32 0
; VECLIB-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[A5]], i32 1
; VECLIB-NEXT: [[TMP12:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP11]])
; VECLIB-NEXT: [[TMP13:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]]
; VECLIB-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP12]]
; VECLIB-NEXT: [[TMP15:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP14]]
; VECLIB-NEXT: store <2 x double> [[TMP15]], ptr @dst, align 8
; VECLIB-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP7]], [[TMP5]]
; VECLIB-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP9]]
; VECLIB-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP10]], [[TMP11]]
; VECLIB-NEXT: store <2 x double> [[TMP12]], ptr @dst, align 8
; VECLIB-NEXT: ret void
;
; AMDLIBM-LABEL: @test(
; AMDLIBM-NEXT: [[A0:%.*]] = load double, ptr @src, align 8
; AMDLIBM-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 1), align 8
; AMDLIBM-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 2), align 8
; AMDLIBM-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 3), align 8
; AMDLIBM-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 4), align 8
; AMDLIBM-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 5), align 8
; AMDLIBM-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 6), align 8
; AMDLIBM-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 7), align 8
; AMDLIBM-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0
; AMDLIBM-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A6]], i32 1
; AMDLIBM-NEXT: [[TMP1:%.*]] = load <8 x double>, ptr @src, align 8
; AMDLIBM-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 2, i32 6>
; AMDLIBM-NEXT: [[TMP3:%.*]] = call fast <2 x double> @amd_vrd2_sin(<2 x double> [[TMP2]])
; AMDLIBM-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[A3]], i32 0
; AMDLIBM-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A7]], i32 1
; AMDLIBM-NEXT: [[TMP6:%.*]] = call fast <2 x double> @amd_vrd2_sin(<2 x double> [[TMP5]])
; AMDLIBM-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; AMDLIBM-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A4]], i32 1
; AMDLIBM-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 3, i32 7>
; AMDLIBM-NEXT: [[TMP5:%.*]] = call fast <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4]])
; AMDLIBM-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 0, i32 4>
; AMDLIBM-NEXT: [[TMP7:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP6]])
; AMDLIBM-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 1, i32 5>
; AMDLIBM-NEXT: [[TMP9:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP8]])
; AMDLIBM-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[A1]], i32 0
; AMDLIBM-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[A5]], i32 1
; AMDLIBM-NEXT: [[TMP12:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP11]])
; AMDLIBM-NEXT: [[TMP13:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]]
; AMDLIBM-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP12]]
; AMDLIBM-NEXT: [[TMP15:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP14]]
; AMDLIBM-NEXT: store <2 x double> [[TMP15]], ptr @dst, align 8
; AMDLIBM-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP7]], [[TMP5]]
; AMDLIBM-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP9]]
; AMDLIBM-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP10]], [[TMP11]]
; AMDLIBM-NEXT: store <2 x double> [[TMP12]], ptr @dst, align 8
; AMDLIBM-NEXT: ret void
;
%a0 = load double, ptr @src, align 8
Expand Down
49 changes: 15 additions & 34 deletions llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,14 @@ define dso_local void @_Z4testP1S(ptr %p) local_unnamed_addr {
; CHECK-LABEL: @_Z4testP1S(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[P:%.*]], i64 0, i32 1, i64 0
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 15
; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 6
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARRAYIDX13]], align 4
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 4
; CHECK-NEXT: [[I7:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 12
; CHECK-NEXT: [[I9:%.*]] = load i32, ptr [[ARRAYIDX27]], align 4
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 13
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[ARRAYIDX34]], align 4
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 5
; CHECK-NEXT: [[I15:%.*]] = load i32, ptr [[ARRAYIDX48]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[I1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 1, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[I7]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[I9]], i32 4
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 8, i32 9, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[I15]], i32 7
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <8 x i32> [[TMP10]], [[TMP2]]
; CHECK-NEXT: store <8 x i32> [[TMP11]], ptr [[P]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARRAYIDX27]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[ARRAYIDX20]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <8 x i32> <i32 3, i32 7, i32 6, i32 4, i32 0, i32 1, i32 2, i32 5>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <8 x i32> [[TMP3]], [[TMP1]]
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[P]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down Expand Up @@ -106,11 +91,9 @@ define dso_local void @test_unordered_splits(ptr nocapture %p) local_unnamed_add
; CHECK-NEXT: [[G20:%.*]] = getelementptr inbounds [16 x i32], ptr [[P2]], i32 0, i64 12
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[G10]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[G20]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 7, i32 5, i32 6, i32 4>
; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[P:%.*]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 7, i32 5, i32 6, i32 4>
; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down Expand Up @@ -165,14 +148,12 @@ define dso_local void @test_cost_splits(ptr nocapture %p) local_unnamed_addr {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[G12]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[G20]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[G22]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; CHECK-NEXT: store <8 x i32> [[TMP10]], ptr [[P:%.*]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: store <8 x i32> [[TMP8]], ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down
24 changes: 10 additions & 14 deletions llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,16 @@
define void @test(ptr noalias %p, ptr noalias %addr, ptr noalias %s) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP9]], [[TMP6]]
; CHECK-NEXT: store <8 x i32> [[TMP10]], ptr [[S:%.*]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[ADDR:%.*]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x ptr> poison, ptr [[P:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x ptr> [[TMP1]], <16 x ptr> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, <16 x ptr> [[TMP2]], <16 x i32> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP4]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[TMP6]], [[TMP7]]
; CHECK-NEXT: store <8 x i32> [[TMP8]], ptr [[S:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
; YAML-NEXT: - Cost: '-7'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '6'
; YAML-NEXT: - TreeSize: '8'
define void @stores(ptr noalias %in, ptr noalias %inn, ptr noalias %out) {
; CHECK-LABEL: @stores(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1
Expand Down Expand Up @@ -66,7 +66,7 @@ define void @stores(ptr noalias %in, ptr noalias %inn, ptr noalias %out) {
; YAML-NEXT: - String: 'SLP vectorized with cost '
; YAML-NEXT: - Cost: '-9'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '6'
; YAML-NEXT: - TreeSize: '8'
define <4 x i64> @insertelems(ptr noalias %in, ptr noalias %inn) {
; CHECK-LABEL: @insertelems(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[IN:%.*]], align 1
Expand Down
81 changes: 30 additions & 51 deletions llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,13 @@
define void @test_supernode_add(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
; ENABLED-LABEL: @test_supernode_add(
; ENABLED-NEXT: entry:
; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
; ENABLED-NEXT: [[TMP3:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP2]]
; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A1]], i32 1
; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[AARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[CARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
; ENABLED-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP0]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP5]]
; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
; ENABLED-NEXT: ret void
;
Expand Down Expand Up @@ -54,19 +48,13 @@ entry:
define void @test_supernode_addsub(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
; ENABLED-LABEL: @test_supernode_addsub(
; ENABLED-NEXT: entry:
; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
; ENABLED-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP0]]
; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A1]], i32 1
; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[AARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[CARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP1]]
; ENABLED-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP0]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP5]]
; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
; ENABLED-NEXT: ret void
;
Expand Down Expand Up @@ -102,22 +90,16 @@ entry:
define void @test_supernode_addsub_alt(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
; ENABLED-LABEL: @test_supernode_addsub_alt(
; ENABLED-NEXT: entry:
; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
; ENABLED-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP0]]
; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP0]]
; ENABLED-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; ENABLED-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A1]], i32 1
; ENABLED-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
; ENABLED-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]]
; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[AARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[CARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP1]]
; ENABLED-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP1]]
; ENABLED-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP0]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP7]]
; ENABLED-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP7]]
; ENABLED-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: store <2 x double> [[TMP10]], ptr [[SARRAY:%.*]], align 8
; ENABLED-NEXT: ret void
Expand Down Expand Up @@ -177,18 +159,15 @@ entry:
define void @supernode_scheduling(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Darray, ptr %Sarray) {
; ENABLED-LABEL: @supernode_scheduling(
; ENABLED-NEXT: entry:
; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
; ENABLED-NEXT: [[C:%.*]] = load double, ptr [[CARRAY:%.*]], align 8
; ENABLED-NEXT: [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
; ENABLED-NEXT: [[D:%.*]] = load double, ptr [[DARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[AARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B1]], i32 1
; ENABLED-NEXT: [[TMP3:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP2]]
; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[D]], i32 1
; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]]
; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
; ENABLED-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP0]], [[TMP3]]
; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP1]], double [[D]], i32 1
; ENABLED-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP5]]
; ENABLED-NEXT: store <2 x double> [[TMP6]], ptr [[SARRAY:%.*]], align 8
; ENABLED-NEXT: ret void
;
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
define void @vec3_vectorize_call(ptr %Colour, float %0) {
; NON-POW2-LABEL: @vec3_vectorize_call(
; NON-POW2-NEXT: entry:
; NON-POW2-NEXT: [[TMP1:%.*]] = load float, ptr [[COLOUR:%.*]], align 4
; NON-POW2-NEXT: [[ARRAYIDX91_I:%.*]] = getelementptr float, ptr [[COLOUR]], i64 1
; NON-POW2-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX91_I]], align 4
; NON-POW2-NEXT: [[TMP3:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 2
; NON-POW2-NEXT: [[TMP4:%.*]] = insertelement <3 x float> [[TMP3]], float [[TMP1]], i32 0
; NON-POW2-NEXT: [[TMP5:%.*]] = insertelement <3 x float> [[TMP4]], float [[TMP2]], i32 1
; NON-POW2-NEXT: [[TMP6:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP5]], <3 x float> zeroinitializer, <3 x float> zeroinitializer)
; NON-POW2-NEXT: store <3 x float> [[TMP6]], ptr [[COLOUR]], align 4
; NON-POW2-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4
; NON-POW2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 2
; NON-POW2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
; NON-POW2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> [[TMP3]], <3 x i32> <i32 3, i32 4, i32 2>
; NON-POW2-NEXT: [[TMP5:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP4]], <3 x float> zeroinitializer, <3 x float> zeroinitializer)
; NON-POW2-NEXT: store <3 x float> [[TMP5]], ptr [[COLOUR]], align 4
; NON-POW2-NEXT: ret void
;
; POW2-ONLY-LABEL: @vec3_vectorize_call(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 7
; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4
; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 1
; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4
; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 6
; CHECK-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4
; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 2
; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4
; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 5
; CHECK-NEXT: [[T17:%.*]] = load i32, ptr [[T16]], align 4
; CHECK-NEXT: [[T20:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 3
Expand All @@ -27,10 +24,6 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], [[T21]]
; CHECK-NEXT: [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]]
; CHECK-NEXT: [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]]
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]]
; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]]
; CHECK-NEXT: [[T39:%.*]] = add nsw i32 [[T37]], [[T38]]
Expand All @@ -39,18 +32,26 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T15]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T40]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T27]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T47]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 6270, i32 poison>, i32 [[T9]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T48]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T40]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 poison, i32 3>
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[T40]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T27]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T47]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> <i32 poison, i32 poison, i32 6270, i32 poison>, <4 x i32> [[TMP8]], <4 x i32> <i32 4, i32 poison, i32 2, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[T48]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[T40]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 poison, i32 3>
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP15]], i32 [[T34]], i32 6
; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4
; CHECK-NEXT: ret void
Expand Down
39 changes: 20 additions & 19 deletions llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 7
; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4
; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 1
; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4
; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 6
; CHECK-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4
; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 2
; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4
; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 5
; CHECK-NEXT: [[T17:%.*]] = load i32, ptr [[T16]], align 4
; CHECK-NEXT: [[T20:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 3
Expand All @@ -27,10 +24,6 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], [[T21]]
; CHECK-NEXT: [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]]
; CHECK-NEXT: [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]]
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]]
; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]]
; CHECK-NEXT: [[T39:%.*]] = add nsw i32 [[T37]], [[T38]]
Expand All @@ -39,18 +32,26 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T15]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T40]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T27]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T47]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 6270, i32 poison>, i32 [[T9]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T48]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T40]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 poison, i32 3>
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[T40]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T27]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T47]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> <i32 poison, i32 poison, i32 6270, i32 poison>, <4 x i32> [[TMP8]], <4 x i32> <i32 4, i32 poison, i32 2, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[T48]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[T40]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 poison, i32 3>
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP15]], i32 [[T34]], i32 6
; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4
; CHECK-NEXT: ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,8 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 7
; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4
; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 1
; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4
; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 6
; CHECK-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4
; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 2
; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4
; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 5
; CHECK-NEXT: [[T17:%.*]] = load i32, ptr [[T16]], align 4
; CHECK-NEXT: [[T20:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 3
Expand All @@ -21,11 +18,7 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], [[T21]]
; CHECK-NEXT: [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]]
; CHECK-NEXT: [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]]
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]]
; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]]
; CHECK-NEXT: [[T39:%.*]] = add nsw i32 [[T37]], [[T38]]
Expand All @@ -35,16 +28,22 @@ define void @test(ptr nocapture %t2) {
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T15]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T40]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T9]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T48]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[T40]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T48]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[T32]], i32 2
; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
Expand Down
25 changes: 13 additions & 12 deletions llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@ define void @test(ptr %a, ptr %b) {
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[ARRAYIDX120:%.*]] = getelementptr [4 x float], ptr [[B:%.*]], i64 0, i64 3
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[ARRAYIDX120]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 poison, i32 3>
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP11]], zeroinitializer
; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[RESULT]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2
; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], zeroinitializer
; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[RESULT]], align 4
; CHECK-NEXT: br label [[FOR_BODY]]
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
define void @test(ptr %isec) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[ISEC:%.*]], i64 2
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ISEC]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[ISEC]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[ISEC:%.*]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[ISEC]], align 8
; CHECK-NEXT: ret void
;
entry:
Expand Down