8 changes: 4 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,11 @@ define void @vecload_vs_broadcast5(double * noalias %from, double * noalias %to,
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
; CHECK-NEXT: br i1 undef, label [[LP]], label [[EXT:%.*]]
; CHECK: ext:
; CHECK-NEXT: ret void
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/SLPVectorizer/X86/phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ define float @foo3(float* nocapture readonly %A) #0 {
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[SHUFFLE]], [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00
; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]]
Expand All @@ -163,14 +163,14 @@ define float @foo3(float* nocapture readonly %A) #0 {
; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>*
; CHECK-NEXT: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP11]] = extractelement <2 x float> [[SHUFFLE1]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13]] = extractelement <2 x float> [[SHUFFLE1]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP13]], i32 1
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP8]], i32 2
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP4]], i32 3
; CHECK-NEXT: [[TMP17:%.*]] = fmul <4 x float> [[TMP16]], <float 1.100000e+01, float 1.000000e+01, float 9.000000e+00, float 8.000000e+00>
; CHECK-NEXT: [[TMP18]] = fadd <4 x float> [[TMP6]], [[TMP17]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[SHUFFLE1]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP11]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP8]], i32 2
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP4]], i32 3
; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], <float 1.100000e+01, float 1.000000e+01, float 9.000000e+00, float 8.000000e+00>
; CHECK-NEXT: [[TMP16]] = extractelement <2 x float> [[SHUFFLE1]], i32 1
; CHECK-NEXT: [[TMP17]] = extractelement <2 x float> [[SHUFFLE1]], i32 0
; CHECK-NEXT: [[TMP18]] = fadd <4 x float> [[TMP6]], [[TMP15]]
; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP19]], 121
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@ define <2 x float> @foo() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SOURCE:%.*]] = insertelement <2 x float> poison, float undef, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = fsub <2 x float> [[SOURCE]], [[SOURCE]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <2 x float> [[RES1]], float [[TMP2]], i32 1
; CHECK-NEXT: ret <2 x float> [[RES2]]
; CHECK-NEXT: ret <2 x float> [[TMP0]]
;
entry:
%source = insertelement <2 x float> poison, float undef, i32 0
Expand Down
6 changes: 1 addition & 5 deletions llvm/test/Transforms/SLPVectorizer/X86/pr31599.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@ define <2 x float> @foo() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SOURCE:%.*]] = insertelement <2 x float> undef, float undef, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = fsub <2 x float> [[SOURCE]], [[SOURCE]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <2 x float> [[RES1]], float [[TMP2]], i32 1
; CHECK-NEXT: ret <2 x float> [[RES2]]
; CHECK-NEXT: ret <2 x float> [[TMP0]]
;
entry:
%source = insertelement <2 x float> undef, float undef, i32 0
Expand Down
30 changes: 12 additions & 18 deletions llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,12 @@ entry:
define void @test1_vec(float %a, float %b, float %c, float %d, <4 x i32>* nocapture %p) {
; CHECK-LABEL: @test1_vec(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[A:%.*]] to i32
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> undef, i32 [[CONV]], i32 0
; CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[B:%.*]] to i32
; CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[CONV1]], i32 1
; CHECK-NEXT: [[CONV3:%.*]] = fptosi float [[C:%.*]] to i32
; CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <4 x i32> [[VECINIT2]], i32 [[CONV3]], i32 2
; CHECK-NEXT: [[CONV5:%.*]] = fptosi float [[D:%.*]] to i32
; CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT4]], i32 [[CONV5]], i32 3
; CHECK-NEXT: store <4 x i32> [[VECINIT6]], <4 x i32>* [[P:%.*]], align 16, !tbaa [[TBAA0]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C:%.*]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32>
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[P:%.*]], align 16, !tbaa [[TBAA0]]
; CHECK-NEXT: ret void
;
entry:
Expand Down Expand Up @@ -84,15 +81,12 @@ entry:

define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, <4 x i32>* nocapture %4) {
; CHECK-LABEL: @test2_vec(
; CHECK-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0:%.*]], 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP1:%.*]], 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP8]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP2:%.*]], 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP10]], i32 2
; CHECK-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP3:%.*]], 1
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i32 3
; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP4:%.*]], align 16, !tbaa [[TBAA0]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0:%.*]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1:%.*]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2:%.*]], i32 2
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3:%.*]], i32 3
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP4:%.*]], align 16, !tbaa [[TBAA0]]
; CHECK-NEXT: ret void
;
%6 = add nsw i32 %0, 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ define <2 x float> @foo({{float, float}}* %A) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast { { float, float } }* [[A:%.*]] to <2 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x float> [[INS1]], float [[TMP4]], i32 0
; CHECK-NEXT: ret <2 x float> [[INS0]]
; CHECK-NEXT: ret <2 x float> [[TMP2]]
;
entry:
%0 = bitcast {{float, float}}* %A to <2 x float>*
Expand Down
6 changes: 1 addition & 5 deletions llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ define <2 x float> @foo({{float, float}}* %A) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast { { float, float } }* [[A:%.*]] to <2 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x float> [[INS1]], float [[TMP4]], i32 0
; CHECK-NEXT: ret <2 x float> [[INS0]]
; CHECK-NEXT: ret <2 x float> [[TMP2]]
;
entry:
%0 = bitcast {{float, float}}* %A to <2 x float>*
Expand Down
113 changes: 54 additions & 59 deletions llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -528,77 +528,72 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
;
; AVX-LABEL: @gather_load_div(
; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x float*> poison, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x float*> [[TMP4]], <2 x float*> undef, <2 x i32> zeroinitializer
; AVX-NEXT: [[TMP6:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 3, i64 14>
; AVX-NEXT: [[TMP7:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 17, i64 8>
; AVX-NEXT: [[TMP8:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 5, i64 20>
; AVX-NEXT: [[TMP9:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> [[TMP9]], float* [[TMP3]], i32 1
; AVX-NEXT: [[TMP11:%.*]] = shufflevector <2 x float*> [[TMP6]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP12:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP13:%.*]] = shufflevector <2 x float*> [[TMP7]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP14:%.*]] = shufflevector <8 x float*> [[TMP12]], <8 x float*> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
; AVX-NEXT: [[TMP15:%.*]] = shufflevector <2 x float*> [[TMP8]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP16:%.*]] = shufflevector <8 x float*> [[TMP14]], <8 x float*> [[TMP15]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX-NEXT: [[TMP17:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP16]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP18:%.*]] = shufflevector <8 x float*> [[TMP9]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX-NEXT: [[TMP19:%.*]] = getelementptr float, <8 x float*> [[TMP18]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX-NEXT: [[TMP20:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP19]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP21:%.*]] = fdiv <8 x float> [[TMP17]], [[TMP20]]
; AVX-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX-NEXT: store <8 x float> [[TMP21]], <8 x float>* [[TMP22]], align 4, !tbaa [[TBAA0]]
; AVX-NEXT: ret void
;
; AVX2-LABEL: @gather_load_div(
; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX2-NEXT: [[TMP4:%.*]] = insertelement <2 x float*> poison, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x float*> [[TMP4]], <2 x float*> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: [[TMP6:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 3, i64 14>
; AVX2-NEXT: [[TMP7:%.*]] = insertelement <4 x float*> poison, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <4 x float*> [[TMP7]], <4 x float*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP9:%.*]] = getelementptr float, <4 x float*> [[TMP8]], <4 x i64> <i64 17, i64 8, i64 5, i64 20>
; AVX2-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX2-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX2-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX2-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX2-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX2-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX2-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX2-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX2-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX2-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX2-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <2 x float*> [[TMP6]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX2-NEXT: [[TMP13:%.*]] = shufflevector <8 x float*> [[TMP11]], <8 x float*> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX2-NEXT: [[TMP14:%.*]] = shufflevector <4 x float*> [[TMP9]], <4 x float*> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX2-NEXT: [[TMP15:%.*]] = shufflevector <8 x float*> [[TMP13]], <8 x float*> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX2-NEXT: [[TMP16:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP15]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP17:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: [[TMP18:%.*]] = getelementptr float, <8 x float*> [[TMP17]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX2-NEXT: [[TMP19:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP18]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP20:%.*]] = fdiv <8 x float> [[TMP16]], [[TMP19]]
; AVX2-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX2-NEXT: store <8 x float> [[TMP20]], <8 x float>* [[TMP21]], align 4, !tbaa [[TBAA0]]
; AVX2-NEXT: ret void
;
; AVX512-LABEL: @gather_load_div(
; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX512-NEXT: [[TMP4:%.*]] = insertelement <2 x float*> poison, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x float*> [[TMP4]], <2 x float*> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: [[TMP6:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 3, i64 14>
; AVX512-NEXT: [[TMP7:%.*]] = insertelement <4 x float*> poison, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP8:%.*]] = shufflevector <4 x float*> [[TMP7]], <4 x float*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP9:%.*]] = getelementptr float, <4 x float*> [[TMP8]], <4 x i64> <i64 17, i64 8, i64 5, i64 20>
; AVX512-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX512-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX512-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX512-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX512-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX512-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX512-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX512-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX512-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX512-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX512-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP12:%.*]] = shufflevector <2 x float*> [[TMP6]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX512-NEXT: [[TMP13:%.*]] = shufflevector <8 x float*> [[TMP11]], <8 x float*> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX512-NEXT: [[TMP14:%.*]] = shufflevector <4 x float*> [[TMP9]], <4 x float*> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX512-NEXT: [[TMP15:%.*]] = shufflevector <8 x float*> [[TMP13]], <8 x float*> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX512-NEXT: [[TMP16:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP15]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP17:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: [[TMP18:%.*]] = getelementptr float, <8 x float*> [[TMP17]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX512-NEXT: [[TMP19:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP18]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP20:%.*]] = fdiv <8 x float> [[TMP16]], [[TMP19]]
; AVX512-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX512-NEXT: store <8 x float> [[TMP20]], <8 x float>* [[TMP21]], align 4, !tbaa [[TBAA0]]
; AVX512-NEXT: ret void
;
%3 = load float, float* %1, align 4, !tbaa !2
Expand Down
113 changes: 54 additions & 59 deletions llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
Original file line number Diff line number Diff line change
Expand Up @@ -528,77 +528,72 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
;
; AVX-LABEL: @gather_load_div(
; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x float*> poison, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x float*> [[TMP4]], <2 x float*> undef, <2 x i32> zeroinitializer
; AVX-NEXT: [[TMP6:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 3, i64 14>
; AVX-NEXT: [[TMP7:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 17, i64 8>
; AVX-NEXT: [[TMP8:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 5, i64 20>
; AVX-NEXT: [[TMP9:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> [[TMP9]], float* [[TMP3]], i32 1
; AVX-NEXT: [[TMP11:%.*]] = shufflevector <2 x float*> [[TMP6]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP12:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP13:%.*]] = shufflevector <2 x float*> [[TMP7]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP14:%.*]] = shufflevector <8 x float*> [[TMP12]], <8 x float*> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
; AVX-NEXT: [[TMP15:%.*]] = shufflevector <2 x float*> [[TMP8]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-NEXT: [[TMP16:%.*]] = shufflevector <8 x float*> [[TMP14]], <8 x float*> [[TMP15]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX-NEXT: [[TMP17:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP16]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP18:%.*]] = shufflevector <8 x float*> [[TMP9]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX-NEXT: [[TMP19:%.*]] = getelementptr float, <8 x float*> [[TMP18]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX-NEXT: [[TMP20:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP19]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX-NEXT: [[TMP21:%.*]] = fdiv <8 x float> [[TMP17]], [[TMP20]]
; AVX-NEXT: [[TMP22:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX-NEXT: store <8 x float> [[TMP21]], <8 x float>* [[TMP22]], align 4, !tbaa [[TBAA0]]
; AVX-NEXT: ret void
;
; AVX2-LABEL: @gather_load_div(
; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX2-NEXT: [[TMP4:%.*]] = insertelement <2 x float*> poison, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x float*> [[TMP4]], <2 x float*> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: [[TMP6:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 3, i64 14>
; AVX2-NEXT: [[TMP7:%.*]] = insertelement <4 x float*> poison, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <4 x float*> [[TMP7]], <4 x float*> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: [[TMP9:%.*]] = getelementptr float, <4 x float*> [[TMP8]], <4 x i64> <i64 17, i64 8, i64 5, i64 20>
; AVX2-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX2-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX2-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX2-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX2-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX2-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX2-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX2-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX2-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX2-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX2-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX2-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <2 x float*> [[TMP6]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX2-NEXT: [[TMP13:%.*]] = shufflevector <8 x float*> [[TMP11]], <8 x float*> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX2-NEXT: [[TMP14:%.*]] = shufflevector <4 x float*> [[TMP9]], <4 x float*> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX2-NEXT: [[TMP15:%.*]] = shufflevector <8 x float*> [[TMP13]], <8 x float*> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX2-NEXT: [[TMP16:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP15]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP17:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: [[TMP18:%.*]] = getelementptr float, <8 x float*> [[TMP17]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX2-NEXT: [[TMP19:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP18]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX2-NEXT: [[TMP20:%.*]] = fdiv <8 x float> [[TMP16]], [[TMP19]]
; AVX2-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX2-NEXT: store <8 x float> [[TMP20]], <8 x float>* [[TMP21]], align 4, !tbaa [[TBAA0]]
; AVX2-NEXT: ret void
;
; AVX512-LABEL: @gather_load_div(
; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
; AVX512-NEXT: [[TMP4:%.*]] = insertelement <2 x float*> poison, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x float*> [[TMP4]], <2 x float*> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: [[TMP6:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 3, i64 14>
; AVX512-NEXT: [[TMP7:%.*]] = insertelement <4 x float*> poison, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP8:%.*]] = shufflevector <4 x float*> [[TMP7]], <4 x float*> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: [[TMP9:%.*]] = getelementptr float, <4 x float*> [[TMP8]], <4 x i64> <i64 17, i64 8, i64 5, i64 20>
; AVX512-NEXT: [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
; AVX512-NEXT: [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
; AVX512-NEXT: [[TMP12:%.*]] = insertelement <8 x float*> [[TMP11]], float* [[TMP4]], i32 2
; AVX512-NEXT: [[TMP13:%.*]] = insertelement <8 x float*> [[TMP12]], float* [[TMP5]], i32 3
; AVX512-NEXT: [[TMP14:%.*]] = insertelement <8 x float*> [[TMP13]], float* [[TMP6]], i32 4
; AVX512-NEXT: [[TMP15:%.*]] = insertelement <8 x float*> [[TMP14]], float* [[TMP7]], i32 5
; AVX512-NEXT: [[TMP16:%.*]] = insertelement <8 x float*> [[TMP15]], float* [[TMP8]], i32 6
; AVX512-NEXT: [[TMP17:%.*]] = insertelement <8 x float*> [[TMP16]], float* [[TMP9]], i32 7
; AVX512-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP17]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP19:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: [[TMP20:%.*]] = getelementptr float, <8 x float*> [[TMP19]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX512-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP20]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP22:%.*]] = fdiv <8 x float> [[TMP18]], [[TMP21]]
; AVX512-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX512-NEXT: store <8 x float> [[TMP22]], <8 x float>* [[TMP23]], align 4, !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP12:%.*]] = shufflevector <2 x float*> [[TMP6]], <2 x float*> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX512-NEXT: [[TMP13:%.*]] = shufflevector <8 x float*> [[TMP11]], <8 x float*> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX512-NEXT: [[TMP14:%.*]] = shufflevector <4 x float*> [[TMP9]], <4 x float*> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX512-NEXT: [[TMP15:%.*]] = shufflevector <8 x float*> [[TMP13]], <8 x float*> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX512-NEXT: [[TMP16:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP15]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP17:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: [[TMP18:%.*]] = getelementptr float, <8 x float*> [[TMP17]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
; AVX512-NEXT: [[TMP19:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP18]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
; AVX512-NEXT: [[TMP20:%.*]] = fdiv <8 x float> [[TMP16]], [[TMP19]]
; AVX512-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
; AVX512-NEXT: store <8 x float> [[TMP20]], <8 x float>* [[TMP21]], align 4, !tbaa [[TBAA0]]
; AVX512-NEXT: ret void
;
%3 = load float, float* %1, align 4, !tbaa !2
Expand Down
85 changes: 36 additions & 49 deletions llvm/test/Transforms/SLPVectorizer/X86/resched.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,70 +12,57 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1
; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 0
; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1
; CHECK-NEXT: [[ARRAYIDX_I_I7_1_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 1
; CHECK-NEXT: [[SHR_1_I_I:%.*]] = lshr i32 [[CONV31_I]], 2
; CHECK-NEXT: [[ARRAYIDX_I_I7_2_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 2
; CHECK-NEXT: [[SHR_2_I_I:%.*]] = lshr i32 [[CONV31_I]], 3
; CHECK-NEXT: [[ARRAYIDX_I_I7_3_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 3
; CHECK-NEXT: [[ARRAYIDX_I_I7_4_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 4
; CHECK-NEXT: [[ARRAYIDX_I_I7_5_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 5
; CHECK-NEXT: [[ARRAYIDX_I_I7_6_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[ARRAYIDX_I_I7_7_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[CONV31_I]], i32 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[CONV31_I]], i32 5
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[CONV31_I]], i32 6
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[CONV31_I]], i32 7
; CHECK-NEXT: [[TMP9:%.*]] = lshr <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; CHECK-NEXT: [[ARRAYIDX_I_I7_8_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 8
; CHECK-NEXT: [[ARRAYIDX_I_I7_9_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9
; CHECK-NEXT: [[ARRAYIDX_I_I7_10_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10
; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP4]], <i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[ARRAYIDX_I_I7_11_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3
; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], <i32 9, i32 10, i32 11, i32 12>
; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12
; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13
; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13
; CHECK-NEXT: [[SHR_13_I_I:%.*]] = lshr i32 [[CONV31_I]], 14
; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14
; CHECK-NEXT: [[SHR_14_I_I:%.*]] = lshr i32 [[CONV31_I]], 15
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP16]], i32 1
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP18]], i32 2
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP9]], i32 2
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP20]], i32 3
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP9]], i32 3
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP22]], i32 4
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP9]], i32 4
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP24]], i32 5
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP9]], i32 5
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i32> [[TMP25]], i32 [[TMP26]], i32 6
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP9]], i32 6
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x i32> [[TMP27]], i32 [[TMP28]], i32 7
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[TMP9]], i32 7
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i32> [[TMP29]], i32 [[TMP30]], i32 8
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <16 x i32> [[TMP31]], i32 [[TMP32]], i32 9
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i32> [[TMP33]], i32 [[TMP34]], i32 10
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x i32> [[TMP35]], i32 [[TMP36]], i32 11
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <16 x i32> [[TMP37]], i32 [[TMP38]], i32 12
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i32> [[TMP39]], i32 [[SHR_12_I_I]], i32 13
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14
; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15
; CHECK-NEXT: [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8>
; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> [[TMP43]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i32> [[TMP4]], <i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_I_I]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SHR_1_I_I]], i32 2
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[SHR_2_I_I]], i32 3
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i32> [[TMP12]], <16 x i32> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i32> [[TMP14]], <16 x i32> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i32> [[TMP16]], <16 x i32> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; CHECK-NEXT: [[TMP19:%.*]] = trunc <16 x i32> [[TMP18]] to <16 x i8>
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP5]], i32 0
; CHECK-NEXT: [[TMP31:%.*]] = and <16 x i8> [[TMP19]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15
; CHECK-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1
; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP31]], <16 x i8>* [[TMP32]], align 1
; CHECK-NEXT: unreachable
; CHECK: if.end50.i:
; CHECK-NEXT: ret void
Expand Down
709 changes: 129 additions & 580 deletions llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll

Large diffs are not rendered by default.

709 changes: 129 additions & 580 deletions llvm/test/Transforms/SLPVectorizer/X86/sext.ll

Large diffs are not rendered by default.

20 changes: 2 additions & 18 deletions llvm/test/Transforms/SLPVectorizer/X86/sign-extend-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,7 @@ define <4 x i32> @sign_extend_v_v(<4 x i16> %lhs) {
; CHECK-LABEL: @sign_extend_v_v(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i16> [[LHS:%.*]] to <4 x i32>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
; CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT3]], i32 [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; CHECK-NEXT: [[VECINIT9:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x i32> [[VECINIT9]]
; CHECK-NEXT: ret <4 x i32> [[TMP0]]
;
entry:
%vecext = extractelement <4 x i16> %lhs, i32 0
Expand All @@ -35,15 +27,7 @@ define <4 x i16> @truncate_v_v(<4 x i32> %lhs) {
; CHECK-LABEL: @truncate_v_v(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i32> [[LHS:%.*]] to <4 x i16>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i16> [[VECINIT]], i16 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
; CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i16> [[VECINIT3]], i16 [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3
; CHECK-NEXT: [[VECINIT9:%.*]] = insertelement <4 x i16> [[VECINIT6]], i16 [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x i16> [[VECINIT9]]
; CHECK-NEXT: ret <4 x i16> [[TMP0]]
;
entry:
%vecext = extractelement <4 x i32> %lhs, i32 0
Expand Down
20 changes: 2 additions & 18 deletions llvm/test/Transforms/SLPVectorizer/X86/sign-extend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,7 @@ define <4 x i32> @sign_extend_v_v(<4 x i16> %lhs) {
; CHECK-LABEL: @sign_extend_v_v(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i16> [[LHS:%.*]] to <4 x i32>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
; CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT3]], i32 [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; CHECK-NEXT: [[VECINIT9:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x i32> [[VECINIT9]]
; CHECK-NEXT: ret <4 x i32> [[TMP0]]
;
entry:
%vecext = extractelement <4 x i16> %lhs, i32 0
Expand All @@ -35,15 +27,7 @@ define <4 x i16> @truncate_v_v(<4 x i32> %lhs) {
; CHECK-LABEL: @truncate_v_v(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i32> [[LHS:%.*]] to <4 x i16>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i16> undef, i16 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i16> [[VECINIT]], i16 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
; CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <4 x i16> [[VECINIT3]], i16 [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3
; CHECK-NEXT: [[VECINIT9:%.*]] = insertelement <4 x i16> [[VECINIT6]], i16 [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x i16> [[VECINIT9]]
; CHECK-NEXT: ret <4 x i16> [[TMP0]]
;
entry:
%vecext = extractelement <4 x i32> %lhs, i32 0
Expand Down
56 changes: 36 additions & 20 deletions llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1283,16 +1283,24 @@ define void @sitofp_16i8_16f32() #0 {
;

define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: @sitofp_4xi32_4f64(
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to double
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to double
; CHECK-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to double
; CHECK-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to double
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x double> poison, double [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x double> [[RES0]], double [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x double> [[RES1]], double [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x double> [[RES2]], double [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x double> [[RES3]]
; SSE-LABEL: @sitofp_4xi32_4f64(
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to double
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to double
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to double
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to double
; SSE-NEXT: [[RES0:%.*]] = insertelement <4 x double> poison, double [[CVT0]], i32 0
; SSE-NEXT: [[RES1:%.*]] = insertelement <4 x double> [[RES0]], double [[CVT1]], i32 1
; SSE-NEXT: [[RES2:%.*]] = insertelement <4 x double> [[RES1]], double [[CVT2]], i32 2
; SSE-NEXT: [[RES3:%.*]] = insertelement <4 x double> [[RES2]], double [[CVT3]], i32 3
; SSE-NEXT: ret <4 x double> [[RES3]]
;
; AVX-LABEL: @sitofp_4xi32_4f64(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
; AVX-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x double>
; AVX-NEXT: ret <4 x double> [[TMP5]]
;
%cvt0 = sitofp i32 %a0 to double
%cvt1 = sitofp i32 %a1 to double
Expand All @@ -1306,16 +1314,24 @@ define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
}

define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: @sitofp_4xi32_4f32(
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to float
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to float
; CHECK-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to float
; CHECK-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to float
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x float> poison, float [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x float> [[RES0]], float [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x float> [[RES1]], float [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x float> [[RES2]], float [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x float> [[RES3]]
; SSE-LABEL: @sitofp_4xi32_4f32(
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to float
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to float
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to float
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to float
; SSE-NEXT: [[RES0:%.*]] = insertelement <4 x float> poison, float [[CVT0]], i32 0
; SSE-NEXT: [[RES1:%.*]] = insertelement <4 x float> [[RES0]], float [[CVT1]], i32 1
; SSE-NEXT: [[RES2:%.*]] = insertelement <4 x float> [[RES1]], float [[CVT2]], i32 2
; SSE-NEXT: [[RES3:%.*]] = insertelement <4 x float> [[RES2]], float [[CVT3]], i32 3
; SSE-NEXT: ret <4 x float> [[RES3]]
;
; AVX-LABEL: @sitofp_4xi32_4f32(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
; AVX-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
; AVX-NEXT: ret <4 x float> [[TMP5]]
;
%cvt0 = sitofp i32 %a0 to float
%cvt1 = sitofp i32 %a1 to float
Expand Down
56 changes: 36 additions & 20 deletions llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1283,16 +1283,24 @@ define void @sitofp_16i8_16f32() #0 {
;

define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: @sitofp_4xi32_4f64(
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to double
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to double
; CHECK-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to double
; CHECK-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to double
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x double> undef, double [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x double> [[RES0]], double [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x double> [[RES1]], double [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x double> [[RES2]], double [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x double> [[RES3]]
; SSE-LABEL: @sitofp_4xi32_4f64(
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to double
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to double
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to double
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to double
; SSE-NEXT: [[RES0:%.*]] = insertelement <4 x double> undef, double [[CVT0]], i32 0
; SSE-NEXT: [[RES1:%.*]] = insertelement <4 x double> [[RES0]], double [[CVT1]], i32 1
; SSE-NEXT: [[RES2:%.*]] = insertelement <4 x double> [[RES1]], double [[CVT2]], i32 2
; SSE-NEXT: [[RES3:%.*]] = insertelement <4 x double> [[RES2]], double [[CVT3]], i32 3
; SSE-NEXT: ret <4 x double> [[RES3]]
;
; AVX-LABEL: @sitofp_4xi32_4f64(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
; AVX-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x double>
; AVX-NEXT: ret <4 x double> [[TMP5]]
;
%cvt0 = sitofp i32 %a0 to double
%cvt1 = sitofp i32 %a1 to double
Expand All @@ -1306,16 +1314,24 @@ define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
}

define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: @sitofp_4xi32_4f32(
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to float
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to float
; CHECK-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to float
; CHECK-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to float
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x float> undef, float [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x float> [[RES0]], float [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x float> [[RES1]], float [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x float> [[RES2]], float [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x float> [[RES3]]
; SSE-LABEL: @sitofp_4xi32_4f32(
; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[A0:%.*]] to float
; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[A1:%.*]] to float
; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[A2:%.*]] to float
; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[A3:%.*]] to float
; SSE-NEXT: [[RES0:%.*]] = insertelement <4 x float> undef, float [[CVT0]], i32 0
; SSE-NEXT: [[RES1:%.*]] = insertelement <4 x float> [[RES0]], float [[CVT1]], i32 1
; SSE-NEXT: [[RES2:%.*]] = insertelement <4 x float> [[RES1]], float [[CVT2]], i32 2
; SSE-NEXT: [[RES3:%.*]] = insertelement <4 x float> [[RES2]], float [[CVT3]], i32 3
; SSE-NEXT: ret <4 x float> [[RES3]]
;
; AVX-LABEL: @sitofp_4xi32_4f32(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A2:%.*]], i32 2
; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A3:%.*]], i32 3
; AVX-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
; AVX-NEXT: ret <4 x float> [[TMP5]]
;
%cvt0 = sitofp i32 %a0 to float
%cvt1 = sitofp i32 %a1 to float
Expand Down
30 changes: 13 additions & 17 deletions llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,30 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: bb279:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float undef, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float undef, i32 1
; CHECK-NEXT: br label [[BB283:%.*]]
; CHECK: bb283:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP13:%.*]], [[EXIT:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP1]], [[EXIT]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ]
; CHECK-NEXT: br label [[BB284:%.*]]
; CHECK: bb284:
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], undef
; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP5]], undef
; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
; CHECK-NEXT: br label [[BB21_I:%.*]]
; CHECK: bb21.i:
; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
; CHECK: bb22.i:
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> undef, [[TMP6]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
; CHECK-NEXT: br label [[BB32_I:%.*]]
; CHECK: bb32.i:
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ [[TMP7]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]]
; CHECK: exit:
; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double>
; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], <double undef, double 0.000000e+00>
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], undef
; CHECK-NEXT: [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float>
; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double undef, double 0.000000e+00>
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef
; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
; CHECK-NEXT: br label [[BB283]]
;
bb279:
Expand Down Expand Up @@ -93,9 +91,7 @@ exit:
define <4 x double> @constant_folding() {
; CHECK-LABEL: @constant_folding(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> poison, double 1.000000e+00, i32 1
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0
; CHECK-NEXT: ret <4 x double> [[I2]]
; CHECK-NEXT: ret <4 x double> <double 2.000000e+00, double 1.000000e+00, double poison, double poison>
;
entry:
%t0 = fadd double 1.000000e+00 , 0.000000e+00
Expand Down
30 changes: 13 additions & 17 deletions llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,30 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: bb279:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float undef, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float undef, i32 1
; CHECK-NEXT: br label [[BB283:%.*]]
; CHECK: bb283:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP13:%.*]], [[EXIT:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP1]], [[EXIT]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ]
; CHECK-NEXT: br label [[BB284:%.*]]
; CHECK: bb284:
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], undef
; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP5]], undef
; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
; CHECK-NEXT: br label [[BB21_I:%.*]]
; CHECK: bb21.i:
; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
; CHECK: bb22.i:
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> undef, [[TMP6]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
; CHECK-NEXT: br label [[BB32_I:%.*]]
; CHECK: bb32.i:
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ [[TMP7]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]]
; CHECK: exit:
; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double>
; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], <double undef, double 0.000000e+00>
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], undef
; CHECK-NEXT: [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float>
; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double undef, double 0.000000e+00>
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef
; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
; CHECK-NEXT: br label [[BB283]]
;
bb279:
Expand Down Expand Up @@ -93,9 +91,7 @@ exit:
define <4 x double> @constant_folding() {
; CHECK-LABEL: @constant_folding(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0
; CHECK-NEXT: ret <4 x double> [[I2]]
; CHECK-NEXT: ret <4 x double> <double 2.000000e+00, double 1.000000e+00, double undef, double undef>
;
entry:
%t0 = fadd double 1.000000e+00 , 0.000000e+00
Expand Down
570 changes: 72 additions & 498 deletions llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll

Large diffs are not rendered by default.

570 changes: 72 additions & 498 deletions llvm/test/Transforms/SLPVectorizer/X86/zext.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,7 @@ define <4 x float> @memread_4x(<4 x float>* %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vmemread(<4 x float> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
entry:
%0 = load <4 x float>, <4 x float>* %a, align 16
Expand Down
10 changes: 1 addition & 9 deletions llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,7 @@ define <4 x float> @memread_4x(<4 x float>* %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vmemread(<4 x float> [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
entry:
%0 = load <4 x float>, <4 x float>* %a, align 16
Expand Down