298 changes: 185 additions & 113 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

120 changes: 44 additions & 76 deletions llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,40 +29,24 @@ define void @PR28330(i32 %n) {
; GATHER: for.body:
; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> poison, i1 [[TMP3]], i32 0
; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1
; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2
; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3
; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4
; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5
; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6
; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7
; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0
; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1
; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2
; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3
; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
; GATHER-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP22]], i32 3
; GATHER-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP23]], i32 4
; GATHER-NEXT: [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP24]], i32 5
; GATHER-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6
; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7
; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]]
; GATHER-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
; GATHER-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
; GATHER-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP3]])
; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP18]], [[P17]]
; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
; GATHER-NEXT: br label [[FOR_BODY]]
;
; MAX-COST-LABEL: @PR28330(
Expand Down Expand Up @@ -165,40 +149,24 @@ define void @PR32038(i32 %n) {
; GATHER: for.body:
; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> poison, i1 [[TMP3]], i32 0
; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1
; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2
; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3
; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4
; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5
; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6
; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7
; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0
; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1
; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2
; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3
; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
; GATHER-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP22]], i32 3
; GATHER-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP23]], i32 4
; GATHER-NEXT: [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP24]], i32 5
; GATHER-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6
; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7
; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5
; GATHER-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
; GATHER-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
; GATHER-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP3]])
; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP18]], -5
; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
; GATHER-NEXT: br label [[FOR_BODY]]
;
; MAX-COST-LABEL: @PR32038(
Expand All @@ -220,16 +188,16 @@ define void @PR32038(i32 %n) {
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
; MAX-COST: for.body:
; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> poison, i1 [[TMP2]], i32 0
; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1
; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[P5]], i32 2
; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[P7]], i32 3
; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
; MAX-COST-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; MAX-COST-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> poison, <4 x i1> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; MAX-COST-NEXT: [[TMP4:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[P5]], i32 2
; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP4]], i1 [[P7]], i32 3
; MAX-COST-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
; MAX-COST-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; MAX-COST-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]]
; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]]
; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <2 x float> @insertelement-fixed-vector() {
; CHECK-LABEL: @insertelement-fixed-vector(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x float> [[I0]], float [[TMP3]], i32 1
; CHECK-NEXT: ret <2 x float> [[I1]]
; CHECK-NEXT: ret <2 x float> [[TMP1]]
;
%f0 = tail call fast float @llvm.fabs.f32(float undef)
%f1 = tail call fast float @llvm.fabs.f32(float undef)
Expand Down
6 changes: 1 addition & 5 deletions llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <2 x float> @insertelement-fixed-vector() {
; CHECK-LABEL: @insertelement-fixed-vector(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x float> [[I0]], float [[TMP3]], i32 1
; CHECK-NEXT: ret <2 x float> [[I1]]
; CHECK-NEXT: ret <2 x float> [[TMP1]]
;
%f0 = tail call fast float @llvm.fabs.f32(float undef)
%f1 = tail call fast float @llvm.fabs.f32(float undef)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[TMP5]]
; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
Expand Down Expand Up @@ -152,15 +152,15 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2_0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1_0]], i32 0
; CHECK-NEXT: [[TMP2_1:%.*]] = insertelement <4 x i32> [[TMP2_0]], i32 [[TMP1_1]], i32 1
; CHECK-NEXT: [[TMP2_3:%.*]] = shufflevector <4 x i32> [[TMP2_1]], <4 x i32> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP2_3]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]]
; CHECK-NEXT: [[TMP2_11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2_32:%.*]] = shufflevector <4 x i32> [[TMP2_11]], <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP2_32]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
Expand Down Expand Up @@ -198,11 +198,11 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP2_1:%.*]] = add i32 [[TMP1_0]], [[TMP1_1]]
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2_0]], i32 0
; CHECK-NEXT: [[TMP3_1:%.*]] = insertelement <4 x i32> [[TMP3_0]], i32 [[TMP2_1]], i32 1
; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <4 x i32> [[TMP3_1]], <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3_31:%.*]] = shufflevector <4 x i32> [[TMP3_1]], <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP3_31]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[TMP5]]
; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
Expand Down Expand Up @@ -152,15 +152,15 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[V0]], [[V1]]
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2_0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1_0]], i32 0
; CHECK-NEXT: [[TMP2_1:%.*]] = insertelement <4 x i32> [[TMP2_0]], i32 [[TMP1_1]], i32 1
; CHECK-NEXT: [[TMP2_3:%.*]] = shufflevector <4 x i32> [[TMP2_1]], <4 x i32> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP2_3]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]]
; CHECK-NEXT: [[TMP2_11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2_32:%.*]] = shufflevector <4 x i32> [[TMP2_11]], <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP2_32]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
Expand Down Expand Up @@ -198,11 +198,11 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP2_1:%.*]] = add i32 [[TMP1_0]], [[TMP1_1]]
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2_0]], i32 0
; CHECK-NEXT: [[TMP3_1:%.*]] = insertelement <4 x i32> [[TMP3_0]], i32 [[TMP2_1]], i32 1
; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <4 x i32> [[TMP3_1]], <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3_31:%.*]] = shufflevector <4 x i32> [[TMP3_1]], <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[TMP3_31]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX8-NEXT: [[TMP3:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
; GFX8-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[TMP3]], i64 0
; GFX8-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
; GFX8-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[TMP4]], i64 1
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
; GFX8-NEXT: [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2
; GFX8-NEXT: ret <3 x i16> [[INS_2]]
;
bb:
Expand Down Expand Up @@ -300,8 +297,8 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
; GFX8-NEXT: [[INS_3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
; GFX8-NEXT: [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; GFX8-NEXT: ret <4 x i16> [[INS_32]]
;
bb:
%arg0.0 = extractelement <4 x i16> %arg0, i64 0
Expand Down
11 changes: 4 additions & 7 deletions llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX8-NEXT: [[TMP3:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
; GFX8-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> undef, i16 [[TMP3]], i64 0
; GFX8-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
; GFX8-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[TMP4]], i64 1
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
; GFX8-NEXT: [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2
; GFX8-NEXT: ret <3 x i16> [[INS_2]]
;
bb:
Expand Down Expand Up @@ -300,8 +297,8 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
; GFX8-NEXT: [[INS_3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
; GFX8-NEXT: [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; GFX8-NEXT: ret <4 x i16> [[INS_32]]
;
bb:
%arg0.0 = extractelement <4 x i16> %arg0, i64 0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
Expand All @@ -16,11 +17,7 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %arg) {
; GFX8-LABEL: @bswap_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[ARG:%.*]])
; GFX8-NEXT: [[TMP1:%.*]] = extractelement <2 x i16> [[TMP0]], i32 0
; GFX8-NEXT: [[T2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
; GFX8-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[TMP0]], i32 1
; GFX8-NEXT: [[T5:%.*]] = insertelement <2 x i16> [[T2]], i16 [[TMP2]], i64 1
; GFX8-NEXT: ret <2 x i16> [[T5]]
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%t = extractelement <2 x i16> %arg, i64 0
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/AMDGPU/bswap.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
Expand All @@ -16,11 +17,7 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %arg) {
; GFX8-LABEL: @bswap_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[ARG:%.*]])
; GFX8-NEXT: [[TMP1:%.*]] = extractelement <2 x i16> [[TMP0]], i32 0
; GFX8-NEXT: [[T2:%.*]] = insertelement <2 x i16> undef, i16 [[TMP1]], i64 0
; GFX8-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[TMP0]], i32 1
; GFX8-NEXT: [[T5:%.*]] = insertelement <2 x i16> [[T2]], i16 [[TMP2]], i64 1
; GFX8-NEXT: ret <2 x i16> [[T5]]
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%t = extractelement <2 x i16> %arg, i64 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@ define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1)
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[ARG1_1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> [[TMP2]], i16 [[ARG1_2]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP1]], <2 x i16> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> undef, i16 [[TMP5]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
; CHECK-NEXT: [[INS_2:%.*]] = insertelement <2 x i16> [[INS_1]], i16 [[TMP6]], i64 1
; CHECK-NEXT: ret <2 x i16> [[INS_2]]
; CHECK-NEXT: ret <2 x i16> [[TMP4]]
;
bb:
%arg0.1 = extractelement <9 x i16> undef, i64 7
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
Expand All @@ -16,11 +17,7 @@ define <2 x half> @round_v2f16(<2 x half> %arg) {
; GFX8-LABEL: @round_v2f16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x half> @llvm.round.v2f16(<2 x half> [[ARG:%.*]])
; GFX8-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[TMP0]], i32 0
; GFX8-NEXT: [[T2:%.*]] = insertelement <2 x half> poison, half [[TMP1]], i64 0
; GFX8-NEXT: [[TMP2:%.*]] = extractelement <2 x half> [[TMP0]], i32 1
; GFX8-NEXT: [[T5:%.*]] = insertelement <2 x half> [[T2]], half [[TMP2]], i64 1
; GFX8-NEXT: ret <2 x half> [[T5]]
; GFX8-NEXT: ret <2 x half> [[TMP0]]
;
bb:
%t = extractelement <2 x half> %arg, i64 0
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/AMDGPU/round.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
Expand All @@ -16,11 +17,7 @@ define <2 x half> @round_v2f16(<2 x half> %arg) {
; GFX8-LABEL: @round_v2f16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x half> @llvm.round.v2f16(<2 x half> [[ARG:%.*]])
; GFX8-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[TMP0]], i32 0
; GFX8-NEXT: [[T2:%.*]] = insertelement <2 x half> undef, half [[TMP1]], i64 0
; GFX8-NEXT: [[TMP2:%.*]] = extractelement <2 x half> [[TMP0]], i32 1
; GFX8-NEXT: [[T5:%.*]] = insertelement <2 x half> [[T2]], half [[TMP2]], i64 1
; GFX8-NEXT: ret <2 x half> [[T5]]
; GFX8-NEXT: ret <2 x half> [[TMP0]]
;
bb:
%t = extractelement <2 x half> %arg, i64 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,7 @@
define <4 x i32> @PR13837(<4 x float> %in) {
; CHECK-LABEL: @PR13837(
; CHECK-NEXT: [[TMP1:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x i32> [[V3]]
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%t0 = extractelement <4 x float> %in, i64 0
%t1 = extractelement <4 x float> %in, i64 1
Expand Down
10 changes: 1 addition & 9 deletions llvm/test/Transforms/SLPVectorizer/ARM/extract-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,7 @@
define <4 x i32> @PR13837(<4 x float> %in) {
; CHECK-LABEL: @PR13837(
; CHECK-NEXT: [[TMP1:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x i32> [[V3]]
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%t0 = extractelement <4 x float> %in, i64 0
%t1 = extractelement <4 x float> %in, i64 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> poison, i32 [[TMP6]], i32 4
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP7]], i32 5
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; CHECK-NEXT: ret void
;
entry:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 4
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP7]], i32 5
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; CHECK-NEXT: ret void
;
entry:
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ define void @Test(i32) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
Expand Down Expand Up @@ -45,11 +45,9 @@ define void @Test(i32) {
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
; CHECK-NEXT: [[TMP15]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP14]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
; CHECK-NEXT: br label [[LOOP]]
;
; FORCE_REDUCTION-LABEL: @Test(
Expand Down
301 changes: 87 additions & 214 deletions llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll

Large diffs are not rendered by default.

301 changes: 87 additions & 214 deletions llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @fadd_fsub_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[R7]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
Expand Down Expand Up @@ -49,58 +49,11 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
}

define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @fmul_fdiv_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; SSE-NEXT: ret <8 x float> [[R7]]
;
; SLM-LABEL: @fmul_fdiv_v8f32(
; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
; SLM-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
; SLM-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
; SLM-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
; SLM-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
; SLM-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3
; SLM-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4
; SLM-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5
; SLM-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6
; SLM-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7
; SLM-NEXT: [[AB0:%.*]] = fmul float [[A0]], [[B0]]
; SLM-NEXT: [[AB1:%.*]] = fdiv float [[A1]], [[B1]]
; SLM-NEXT: [[AB2:%.*]] = fdiv float [[A2]], [[B2]]
; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], [[B3]]
; SLM-NEXT: [[AB4:%.*]] = fmul float [[A4]], [[B4]]
; SLM-NEXT: [[AB5:%.*]] = fdiv float [[A5]], [[B5]]
; SLM-NEXT: [[AB6:%.*]] = fdiv float [[A6]], [[B6]]
; SLM-NEXT: [[AB7:%.*]] = fmul float [[A7]], [[B7]]
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; SLM-NEXT: ret <8 x float> [[R7]]
;
; AVX-LABEL: @fmul_fdiv_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX-NEXT: ret <8 x float> [[R7]]
;
; AVX512-LABEL: @fmul_fdiv_v8f32(
; AVX512-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX512-NEXT: ret <8 x float> [[R7]]
; CHECK-LABEL: @fmul_fdiv_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
Expand Down Expand Up @@ -148,11 +101,8 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
; SLM-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; SLM-NEXT: [[R0:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; SLM-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
; SLM-NEXT: [[R11:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R11]], float [[A2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
; SLM-NEXT: ret <4 x float> [[R3]]
;
Expand Down
68 changes: 9 additions & 59 deletions llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @fadd_fsub_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[R7]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
Expand Down Expand Up @@ -49,58 +49,11 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
}

define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @fmul_fdiv_v8f32(
; SSE-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; SSE-NEXT: ret <8 x float> [[R7]]
;
; SLM-LABEL: @fmul_fdiv_v8f32(
; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
; SLM-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
; SLM-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
; SLM-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
; SLM-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
; SLM-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <8 x float> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <8 x float> [[B]], i32 3
; SLM-NEXT: [[B4:%.*]] = extractelement <8 x float> [[B]], i32 4
; SLM-NEXT: [[B5:%.*]] = extractelement <8 x float> [[B]], i32 5
; SLM-NEXT: [[B6:%.*]] = extractelement <8 x float> [[B]], i32 6
; SLM-NEXT: [[B7:%.*]] = extractelement <8 x float> [[B]], i32 7
; SLM-NEXT: [[AB0:%.*]] = fmul float [[A0]], [[B0]]
; SLM-NEXT: [[AB1:%.*]] = fdiv float [[A1]], [[B1]]
; SLM-NEXT: [[AB2:%.*]] = fdiv float [[A2]], [[B2]]
; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], [[B3]]
; SLM-NEXT: [[AB4:%.*]] = fmul float [[A4]], [[B4]]
; SLM-NEXT: [[AB5:%.*]] = fdiv float [[A5]], [[B5]]
; SLM-NEXT: [[AB6:%.*]] = fdiv float [[A6]], [[B6]]
; SLM-NEXT: [[AB7:%.*]] = fmul float [[A7]], [[B7]]
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; SLM-NEXT: ret <8 x float> [[R7]]
;
; AVX-LABEL: @fmul_fdiv_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX-NEXT: ret <8 x float> [[R7]]
;
; AVX512-LABEL: @fmul_fdiv_v8f32(
; AVX512-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX512-NEXT: ret <8 x float> [[R7]]
; CHECK-LABEL: @fmul_fdiv_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1
Expand Down Expand Up @@ -148,11 +101,8 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
; SLM-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; SLM-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; SLM-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
; SLM-NEXT: [[R11:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R11]], float [[A2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
; SLM-NEXT: ret <4 x float> [[R3]]
;
Expand Down
224 changes: 107 additions & 117 deletions llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll

Large diffs are not rendered by default.

224 changes: 107 additions & 117 deletions llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll

Large diffs are not rendered by default.

498 changes: 59 additions & 439 deletions llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll

Large diffs are not rendered by default.

498 changes: 59 additions & 439 deletions llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,9 @@ define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {

define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @h(
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x i8> poison, i8 [[X0X0]], i32 0
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> [[INS1]], i8 [[X3X3]], i32 1
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
; CHECK-NEXT: ret <4 x i8> [[INS4]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
;
%x0 = extractelement <4 x i8> %x, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
Expand All @@ -53,16 +43,9 @@ define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {

define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @h_undef(
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> <i8 undef, i8 poison, i8 poison, i8 poison>, i8 [[X3X3]], i32 1
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
; CHECK-NEXT: ret <4 x i8> [[INS4]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 3, i32 5, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
;
%x0 = extractelement <4 x i8> undef, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
Expand Down
29 changes: 6 additions & 23 deletions llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,9 @@ define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {

define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @h(
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x i8> undef, i8 [[X0X0]], i32 0
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> [[INS1]], i8 [[X3X3]], i32 1
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
; CHECK-NEXT: ret <4 x i8> [[INS4]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
;
%x0 = extractelement <4 x i8> %x, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
Expand All @@ -53,16 +43,9 @@ define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) {

define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @h_undef(
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i8> undef, i8 [[X3X3]], i32 1
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
; CHECK-NEXT: ret <4 x i8> [[INS4]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 3, i32 5, i32 6>
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]]
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
;
%x0 = extractelement <4 x i8> undef, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,7 @@ define void @hoge(i64 %idx, <4 x i32>* %sink) {
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP8]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP10]], i32 2
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i32 3
; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: ret void
;
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,7 @@ define void @hoge(i64 %idx, <4 x i32>* %sink) {
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> undef, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP8]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP10]], i32 2
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i32 3
; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: ret void
;
bb:
Expand Down
30 changes: 12 additions & 18 deletions llvm/test/Transforms/SLPVectorizer/X86/fptosi-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -487,15 +487,12 @@ define void @fptosi_8f32_8i8() #0 {

define <4 x i32> @fptosi_4xf64_4i32(double %a0, double %a1, double %a2, double %a3) #0 {
; CHECK-LABEL: @fptosi_4xf64_4i32(
; CHECK-NEXT: [[CVT0:%.*]] = fptosi double [[A0:%.*]] to i32
; CHECK-NEXT: [[CVT1:%.*]] = fptosi double [[A1:%.*]] to i32
; CHECK-NEXT: [[CVT2:%.*]] = fptosi double [[A2:%.*]] to i32
; CHECK-NEXT: [[CVT3:%.*]] = fptosi double [[A3:%.*]] to i32
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x i32> poison, i32 [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x i32> [[RES0]], i32 [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x i32> [[RES1]], i32 [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x i32> [[RES2]], i32 [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RES3]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[A2:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = fptosi <4 x double> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP5]]
;
%cvt0 = fptosi double %a0 to i32
%cvt1 = fptosi double %a1 to i32
Expand All @@ -510,15 +507,12 @@ define <4 x i32> @fptosi_4xf64_4i32(double %a0, double %a1, double %a2, double %

define <4 x i32> @fptosi_4xf32_4i32(float %a0, float %a1, float %a2, float %a3) #0 {
; CHECK-LABEL: @fptosi_4xf32_4i32(
; CHECK-NEXT: [[CVT0:%.*]] = fptosi float [[A0:%.*]] to i32
; CHECK-NEXT: [[CVT1:%.*]] = fptosi float [[A1:%.*]] to i32
; CHECK-NEXT: [[CVT2:%.*]] = fptosi float [[A2:%.*]] to i32
; CHECK-NEXT: [[CVT3:%.*]] = fptosi float [[A3:%.*]] to i32
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x i32> poison, i32 [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x i32> [[RES0]], i32 [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x i32> [[RES1]], i32 [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x i32> [[RES2]], i32 [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RES3]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[A2:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = fptosi <4 x float> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP5]]
;
%cvt0 = fptosi float %a0 to i32
%cvt1 = fptosi float %a1 to i32
Expand Down
30 changes: 12 additions & 18 deletions llvm/test/Transforms/SLPVectorizer/X86/fptosi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -487,15 +487,12 @@ define void @fptosi_8f32_8i8() #0 {

define <4 x i32> @fptosi_4xf64_4i32(double %a0, double %a1, double %a2, double %a3) #0 {
; CHECK-LABEL: @fptosi_4xf64_4i32(
; CHECK-NEXT: [[CVT0:%.*]] = fptosi double [[A0:%.*]] to i32
; CHECK-NEXT: [[CVT1:%.*]] = fptosi double [[A1:%.*]] to i32
; CHECK-NEXT: [[CVT2:%.*]] = fptosi double [[A2:%.*]] to i32
; CHECK-NEXT: [[CVT3:%.*]] = fptosi double [[A3:%.*]] to i32
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x i32> undef, i32 [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x i32> [[RES0]], i32 [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x i32> [[RES1]], i32 [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x i32> [[RES2]], i32 [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RES3]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[A2:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = fptosi <4 x double> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP5]]
;
%cvt0 = fptosi double %a0 to i32
%cvt1 = fptosi double %a1 to i32
Expand All @@ -510,15 +507,12 @@ define <4 x i32> @fptosi_4xf64_4i32(double %a0, double %a1, double %a2, double %

define <4 x i32> @fptosi_4xf32_4i32(float %a0, float %a1, float %a2, float %a3) #0 {
; CHECK-LABEL: @fptosi_4xf32_4i32(
; CHECK-NEXT: [[CVT0:%.*]] = fptosi float [[A0:%.*]] to i32
; CHECK-NEXT: [[CVT1:%.*]] = fptosi float [[A1:%.*]] to i32
; CHECK-NEXT: [[CVT2:%.*]] = fptosi float [[A2:%.*]] to i32
; CHECK-NEXT: [[CVT3:%.*]] = fptosi float [[A3:%.*]] to i32
; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x i32> undef, i32 [[CVT0]], i32 0
; CHECK-NEXT: [[RES1:%.*]] = insertelement <4 x i32> [[RES0]], i32 [[CVT1]], i32 1
; CHECK-NEXT: [[RES2:%.*]] = insertelement <4 x i32> [[RES1]], i32 [[CVT2]], i32 2
; CHECK-NEXT: [[RES3:%.*]] = insertelement <4 x i32> [[RES2]], i32 [[CVT3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RES3]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[A2:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = fptosi <4 x float> [[TMP4]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP5]]
;
%cvt0 = fptosi float %a0 to i32
%cvt1 = fptosi float %a1 to i32
Expand Down
64 changes: 19 additions & 45 deletions llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,11 @@
;

define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: @test_v2f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <2 x double> [[TMP3]]
;
; SLM-LABEL: @test_v2f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1
; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]]
; SLM-NEXT: [[R00:%.*]] = insertelement <2 x double> poison, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: ret <2 x double> [[R01]]
;
; AVX-LABEL: @test_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <2 x double> [[TMP3]]
; CHECK-LABEL: @test_v2f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <2 x double> [[TMP3]]
;
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1
Expand Down Expand Up @@ -169,27 +152,18 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R03]]
; SSE-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R032]]
;
; SLM-LABEL: @test_v4f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3
; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]]
; SLM-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]]
; SLM-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i32 2
; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i32 3
; SLM-NEXT: ret <4 x double> [[R03]]
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: ret <4 x double> [[R032]]
;
; AVX-LABEL: @test_v4f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
Expand Down Expand Up @@ -230,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R07]]
; SLM-NEXT: [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R072]]
;
; AVX-LABEL: @test_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
Expand Down Expand Up @@ -350,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV15]]
; SSE-NEXT: [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV152]]
;
; SLM-LABEL: @test_v16i16(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
Expand Down
64 changes: 19 additions & 45 deletions llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,11 @@
;

define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: @test_v2f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <2 x double> [[TMP3]]
;
; SLM-LABEL: @test_v2f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1
; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]]
; SLM-NEXT: [[R00:%.*]] = insertelement <2 x double> undef, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: ret <2 x double> [[R01]]
;
; AVX-LABEL: @test_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <2 x double> [[TMP3]]
; CHECK-LABEL: @test_v2f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <2 x double> [[TMP3]]
;
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1
Expand Down Expand Up @@ -169,27 +152,18 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R03]]
; SSE-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R032]]
;
; SLM-LABEL: @test_v4f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3
; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]]
; SLM-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]]
; SLM-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i32 2
; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i32 3
; SLM-NEXT: ret <4 x double> [[R03]]
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: ret <4 x double> [[R032]]
;
; AVX-LABEL: @test_v4f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
Expand Down Expand Up @@ -230,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R07]]
; SLM-NEXT: [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R072]]
;
; AVX-LABEL: @test_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
Expand Down Expand Up @@ -350,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV15]]
; SSE-NEXT: [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV152]]
;
; SLM-LABEL: @test_v16i16(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
Expand Down
64 changes: 19 additions & 45 deletions llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,11 @@
;

define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: @test_v2f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <2 x double> [[TMP3]]
;
; SLM-LABEL: @test_v2f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1
; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]]
; SLM-NEXT: [[R00:%.*]] = insertelement <2 x double> poison, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: ret <2 x double> [[R01]]
;
; AVX-LABEL: @test_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; AVX-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <2 x double> [[TMP3]]
; CHECK-LABEL: @test_v2f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <2 x double> [[TMP3]]
;
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1
Expand Down Expand Up @@ -169,27 +152,18 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R03]]
; SSE-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R032]]
;
; SLM-LABEL: @test_v4f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3
; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]]
; SLM-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]]
; SLM-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]]
; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i32 2
; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i32 3
; SLM-NEXT: ret <4 x double> [[R03]]
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: ret <4 x double> [[R032]]
;
; AVX-LABEL: @test_v4f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
Expand Down Expand Up @@ -230,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R07]]
; SLM-NEXT: [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R072]]
;
; AVX-LABEL: @test_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
Expand Down Expand Up @@ -350,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV15]]
; SSE-NEXT: [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV152]]
;
; SLM-LABEL: @test_v16i16(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
Expand Down
64 changes: 19 additions & 45 deletions llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,11 @@
;

define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: @test_v2f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <2 x double> [[TMP3]]
;
; SLM-LABEL: @test_v2f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
; SLM-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1
; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]]
; SLM-NEXT: [[R00:%.*]] = insertelement <2 x double> undef, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: ret <2 x double> [[R01]]
;
; AVX-LABEL: @test_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; AVX-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <2 x double> [[TMP3]]
; CHECK-LABEL: @test_v2f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <2 x double> [[TMP3]]
;
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1
Expand Down Expand Up @@ -169,27 +152,18 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R03]]
; SSE-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x double> [[R032]]
;
; SLM-LABEL: @test_v4f64(
; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1
; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2
; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3
; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]]
; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]]
; SLM-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]]
; SLM-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]]
; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i32 0
; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i32 1
; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i32 2
; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i32 3
; SLM-NEXT: ret <4 x double> [[R03]]
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: ret <4 x double> [[R032]]
;
; AVX-LABEL: @test_v4f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
Expand Down Expand Up @@ -230,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R07]]
; SLM-NEXT: [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x float> [[R072]]
;
; AVX-LABEL: @test_v8f32(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
Expand Down Expand Up @@ -350,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV15]]
; SSE-NEXT: [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <16 x i16> [[RV152]]
;
; SLM-LABEL: @test_v16i16(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,7 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
; CHECK-LABEL: @simple_select(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
; CHECK-NEXT: ret <4 x float> [[RD]]
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
Expand Down Expand Up @@ -131,15 +123,7 @@ define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float
; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[SHUFFLE]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[SHUFFLE1]], <4 x float> [[SHUFFLE2]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
; CHECK-NEXT: ret <4 x float> [[RD]]
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
Expand Down Expand Up @@ -176,16 +160,8 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
; CHECK-LABEL: @simple_select_users(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
; CHECK-NEXT: call void @v4f32_user(<4 x float> [[RD]]) [[ATTR0:#.*]]
; CHECK-NEXT: ret <4 x float> [[RD]]
; CHECK-NEXT: call void @v4f32_user(<4 x float> [[TMP2]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
Expand Down Expand Up @@ -246,15 +222,11 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> poison, float [[TMP19]], i32 2
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3
; CHECK-NEXT: ret <4 x float> [[RD]]
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[RB2:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP17]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x float> [[RD1]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
Expand Down Expand Up @@ -312,11 +284,7 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
; CHECK-LABEL: @simple_select_v2(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <2 x float> [[RA]], float [[TMP4]], i32 1
; CHECK-NEXT: ret <2 x float> [[RB]]
; CHECK-NEXT: ret <2 x float> [[TMP2]]
;
%c0 = extractelement <2 x i32> %c, i32 0
%c1 = extractelement <2 x i32> %c, i32 1
Expand Down Expand Up @@ -384,15 +352,7 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @reschedule_extract(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x float> [[V3]]
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
Expand All @@ -418,15 +378,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @take_credit(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3
; CHECK-NEXT: ret <4 x float> [[V3]]
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
Expand Down Expand Up @@ -456,15 +408,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 3.000000e+00, double 2.000000e+00, double 1.000000e+00, double 0.000000e+00>
; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 3
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> poison, double [[TMP7]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 2
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 1
; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP9]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 0
; CHECK-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP10]], i32 0
; CHECK-NEXT: ret <4 x double> [[I4]]
; CHECK-NEXT: ret <4 x double> [[TMP6]]
;
%t0 = fadd double %w , 0.000000e+00
%t1 = fadd double %x , 1.000000e+00
Expand All @@ -484,23 +428,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
; CHECK-LABEL: @_vadd256(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP4]], i32 2
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP5]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
; CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP6]], i32 4
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5
; CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP7]], i32 5
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6
; CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP8]], i32 6
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7
; CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP9]], i32 7
; CHECK-NEXT: ret <8 x float> [[VECINIT7_I]]
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %b, i32 0
Expand Down
Loading