Original file line number Diff line number Diff line change
Expand Up @@ -1562,7 +1562,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],mem[1,3],ymm0[4,4],mem[5,7]
; AVX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpaddb 16(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
Expand Down Expand Up @@ -3439,7 +3439,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; AVX-NEXT: vmovaps 48(%rdi), %xmm0
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
; AVX-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[1,3],ymm1[4,4],ymm0[5,7]
; AVX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = mem[0,1,0,1]
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX-NEXT: vpaddb 16(%rsi), %xmm2, %xmm2
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ define <4 x double> @test_mm256_permute_pd(<4 x double> %a0) nounwind {
define <4 x float> @test_mm_permute_ps(<4 x float> %a0) nounwind {
; CHECK-LABEL: test_mm_permute_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: ret{{[l|q]}}
%res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %res
Expand All @@ -1284,7 +1284,7 @@ define <4 x float> @test_mm_permute_ps(<4 x float> %a0) nounwind {
define <4 x float> @test2_mm_permute_ps(<4 x float> %a0) nounwind {
; CHECK-LABEL: test2_mm_permute_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,2,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1,2,3]
; CHECK-NEXT: ret{{[l|q]}}
%res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
ret <4 x float> %res
Expand All @@ -1293,7 +1293,7 @@ define <4 x float> @test2_mm_permute_ps(<4 x float> %a0) nounwind {
define <8 x float> @test_mm256_permute_ps(<8 x float> %a0) nounwind {
; CHECK-LABEL: test_mm256_permute_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; CHECK-NEXT: ret{{[l|q]}}
%res = shufflevector <8 x float> %a0, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %res
Expand Down Expand Up @@ -1934,7 +1934,7 @@ define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {
; X86-LABEL: test_mm256_set1_epi32:
; X86: # %bb.0:
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X86-NEXT: retl
;
Expand Down Expand Up @@ -2002,13 +2002,13 @@ define <8 x float> @test_mm256_set1_ps(float %a0) nounwind {
; X86-LABEL: test_mm256_set1_ps:
; X86: # %bb.0:
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_set1_ps:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
%res0 = insertelement <8 x float> undef, float %a0, i32 0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -879,13 +879,13 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
; AVX-LABEL: test_x86_avx_vpermil_ps:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps $7, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
; AVX-NEXT: vshufps $7, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x07]
; AVX-NEXT: # xmm0 = xmm0[3,1,0,0]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512VL-LABEL: test_x86_avx_vpermil_ps:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpermilps $7, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
; AVX512VL-NEXT: vshufps $7, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0x07]
; AVX512VL-NEXT: # xmm0 = xmm0[3,1,0,0]
; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
Expand All @@ -897,13 +897,13 @@ declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
; AVX-LABEL: test_x86_avx_vpermil_ps_256:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
; AVX-NEXT: vshufps $7, %ymm0, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc6,0xc0,0x07]
; AVX-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512VL-LABEL: test_x86_avx_vpermil_ps_256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpermilps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
; AVX512VL-NEXT: vshufps $7, %ymm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xc0,0x07]
; AVX512VL-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4]
; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcG:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
Expand All @@ -157,7 +157,7 @@ define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcH:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
; CHECK-NEXT: ret{{[l|q]}}
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/avx-vbroadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -624,12 +624,12 @@ entry:
define <4 x i32> @H(<4 x i32> %a) {
; X86-LABEL: H:
; X86: ## %bb.0: ## %entry
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: retl
;
; X64-LABEL: H:
; X64: ## %bb.0: ## %entry
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: retq
entry:
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
Expand Down Expand Up @@ -982,7 +982,7 @@ define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind {
; X64: ## %bb.0: ## %bb
; X64-NEXT: movdq2q %xmm0, %mm0
; X64-NEXT: movq2dq %mm0, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-NEXT: retq
bb:
%tmp1 = bitcast x86_mmx %tmp to i64
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx-vinsertf128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ define void @insert_crash() nounwind {
; CHECK-NEXT: vminpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vminsd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: vmovups %xmm0, (%rax)
; CHECK-NEXT: retq
allocas:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx-vperm2x128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ define <8 x float> @shuffle_v8f32_uu67ucuf(<8 x float> %a, <8 x float> %b) nounw
; ALL-LABEL: shuffle_v8f32_uu67ucuf:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
; ALL-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2015,7 +2015,7 @@ declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
define <4 x i64> @test_mm256_shuffle_epi32(<4 x i64> %a0) {
; CHECK-LABEL: test_mm256_shuffle_epi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
; CHECK-NEXT: ret{{[l|q]}}
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%shuf = shufflevector <8 x i32> %arg0, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ define <4 x i64> @f64to4sl(<4 x double> %a) {
define <4 x i64> @f32to4sl(<4 x float> %a) {
; NODQ-LABEL: f32to4sl:
; NODQ: # %bb.0:
; NODQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; NODQ-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; NODQ-NEXT: vcvttss2si %xmm1, %rax
; NODQ-NEXT: vmovq %rax, %xmm1
; NODQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
Expand Down Expand Up @@ -1707,7 +1707,7 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
; NOVL: # %bb.0:
; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; NOVL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0
; NOVL-NEXT: retq
;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -987,7 +987,7 @@ define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) {
define <16 x float> @test_mm512_permute_ps(<16 x float> %a0) {
; CHECK-LABEL: test_mm512_permute_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; CHECK-NEXT: ret{{[l|q]}}
%res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
ret <16 x float> %res
Expand Down Expand Up @@ -1128,7 +1128,7 @@ define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) {
define <8 x i64> @test_mm512_shuffle_epi32(<8 x i64> %a0) {
; CHECK-LABEL: test_mm512_shuffle_epi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; CHECK-NEXT: ret{{[l|q]}}
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16
define <16 x float>@test_int_x86_avx512_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xc0,0x16]
; CHECK-NEXT: vshufps $22, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xc0,0x16]
; CHECK-NEXT: ## zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
Expand Down Expand Up @@ -1072,7 +1072,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>
define <16 x i32>@test_int_x86_avx512_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2) {
; CHECK-LABEL: test_int_x86_avx512_pshuf_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpermilps $3, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xc0,0x03]
; CHECK-NEXT: vshufps $3, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xc0,0x03]
; CHECK-NEXT: ## zmm0 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
define <4 x float> @test_4xfloat_perm_mask0(<4 x float> %vec) {
; CHECK-LABEL: test_4xfloat_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,1]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,1,3,1]
; CHECK-NEXT: retq
%res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 1>
ret <4 x float> %res
Expand Down Expand Up @@ -92,7 +92,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask2(<4 x float> %vec, <4 x floa
define <4 x float> @test_4xfloat_perm_mask3(<4 x float> %vec) {
; CHECK-LABEL: test_4xfloat_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,2]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,2,3,2]
; CHECK-NEXT: retq
%res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2>
ret <4 x float> %res
Expand Down Expand Up @@ -342,7 +342,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x floa
define <8 x float> @test_8xfloat_perm_imm_mask3(<8 x float> %vec) {
; CHECK-LABEL: test_8xfloat_perm_imm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,1,0,6,6,5,4]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,2,1,0,6,6,5,4]
; CHECK-NEXT: retq
%res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 2, i32 1, i32 0, i32 6, i32 6, i32 5, i32 4>
ret <8 x float> %res
Expand Down Expand Up @@ -835,7 +835,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x
define <16 x float> @test_16xfloat_perm_imm_mask3(<16 x float> %vec) {
; CHECK-LABEL: test_16xfloat_perm_imm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
; CHECK-NEXT: retq
%res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 0, i32 2, i32 5, i32 5, i32 4, i32 6, i32 9, i32 9, i32 8, i32 10, i32 13, i32 13, i32 12, i32 14>
ret <16 x float> %res
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/avx512-shuffles/shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2091,7 +2091,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(ptr %vp, <32 x i16> %
define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) {
; CHECK-LABEL: test_4xi32_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,3,0]
; CHECK-NEXT: retq
%res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
ret <4 x i32> %res
Expand Down Expand Up @@ -2171,7 +2171,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask
define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) {
; CHECK-LABEL: test_4xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,0,3]
; CHECK-NEXT: retq
%res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
ret <4 x i32> %res
Expand Down Expand Up @@ -2325,7 +2325,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(ptr %vp, <4 x i32> %mask) {
define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) {
; CHECK-LABEL: test_8xi32_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4]
; CHECK-NEXT: retq
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
ret <8 x i32> %res
Expand Down Expand Up @@ -2405,7 +2405,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask
define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) {
; CHECK-LABEL: test_8xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4]
; CHECK-NEXT: retq
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4>
ret <8 x i32> %res
Expand Down Expand Up @@ -2559,7 +2559,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(ptr %vp, <8 x i32> %mask) {
define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_perm_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12]
; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12]
; CHECK-NEXT: retq
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12>
ret <16 x i32> %res
Expand Down Expand Up @@ -2639,7 +2639,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %
define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15]
; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15]
; CHECK-NEXT: retq
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15>
ret <16 x i32> %res
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ define void @trunc_qd_256_mem(<4 x i64> %i, ptr %res) #0 {
define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
; ALL-LABEL: trunc_qd_128:
; ALL: ## %bb.0:
; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; ALL-NEXT: retq
%x = trunc <2 x i64> %i to <2 x i32>
ret <2 x i32> %x
Expand All @@ -279,7 +279,7 @@ define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
define void @trunc_qd_128_mem(<2 x i64> %i, ptr %res) #0 {
; KNL-LABEL: trunc_qd_128_mem:
; KNL: ## %bb.0:
; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL-NEXT: vmovlps %xmm0, (%rdi)
; KNL-NEXT: retq
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx512-vec-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,7 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
; AVX512-LABEL: test46:
; AVX512: ## %bb.0:
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
; AVX512-NEXT: vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
; AVX512-NEXT: vshufps $212, %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc6,0xc0,0xd4]
; AVX512-NEXT: ## xmm0 = xmm0[0,1,1,3]
; AVX512-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/avx512fp16-mov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1334,7 +1334,7 @@ define half @extract_f16_5(<8 x half> %x) {
define half @extract_f16_6(<8 x half> %x) {
; CHECK-LABEL: extract_f16_6:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: ret{{[l|q]}}
%res = extractelement <8 x half> %x, i32 6
ret half %res
Expand Down Expand Up @@ -1606,14 +1606,14 @@ define void @extract_store_f16_5(<8 x half> %x, ptr %y) {
define void @extract_store_f16_6(<8 x half> %x, ptr %y) {
; X64-LABEL: extract_store_f16_6:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; X64-NEXT: vmovsh %xmm0, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: extract_store_f16_6:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; X86-NEXT: vmovsh %xmm0, (%eax)
; X86-NEXT: retl
%res = extractelement <8 x half> %x, i32 6
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512fp16-mscatter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ define void @test_mscatter_v16f16(ptr %base, <16 x i32> %index, <16 x half> %val
; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpextrq $1, %xmm4, %rax
; CHECK-NEXT: vmovsh %xmm3, (%rax)
; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,3,3,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm0
; CHECK-NEXT: vmovq %xmm0, %rax
; CHECK-NEXT: vmovsh %xmm3, (%rax)
Expand All @@ -58,7 +58,7 @@ define void @test_mscatter_v16f16(ptr %base, <16 x i32> %index, <16 x half> %val
; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpextrq $1, %xmm3, %rax
; CHECK-NEXT: vmovsh %xmm1, (%rax)
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; CHECK-NEXT: vextracti32x4 $3, %zmm2, %xmm2
; CHECK-NEXT: vmovq %xmm2, %rax
; CHECK-NEXT: vmovsh %xmm1, (%rax)
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,7 @@ declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x
define <8 x float>@test_int_x86_avx512_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x16]
; CHECK-NEXT: vshufps $22, %ymm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xc0,0x16]
; CHECK-NEXT: # ymm0 = ymm0[2,1,1,0,6,5,5,4]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
Expand Down Expand Up @@ -943,7 +943,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x
define <4 x float>@test_int_x86_avx512_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x16]
; CHECK-NEXT: vshufps $22, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0x16]
; CHECK-NEXT: # xmm0 = xmm0[2,1,1,0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
Expand Down Expand Up @@ -1967,7 +1967,7 @@ declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i
define <4 x i32>@test_int_x86_avx512_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2) {
; CHECK-LABEL: test_int_x86_avx512_pshuf_d_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x03]
; CHECK-NEXT: vshufps $3, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0x03]
; CHECK-NEXT: # xmm0 = xmm0[3,0,0,0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
Expand Down Expand Up @@ -2019,7 +2019,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i
define <8 x i32>@test_int_x86_avx512_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2) {
; CHECK-LABEL: test_int_x86_avx512_pshuf_d_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x03]
; CHECK-NEXT: vshufps $3, %ymm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xc0,0x03]
; CHECK-NEXT: # ymm0 = ymm0[3,0,0,0,7,4,4,4]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
Expand Down Expand Up @@ -617,7 +617,7 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
Expand All @@ -627,7 +627,7 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
Expand Down Expand Up @@ -806,7 +806,7 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
Expand All @@ -821,7 +821,7 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/buildvec-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
;
; AVX-LABEL: extract1_i32_zext_insert0_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
Expand Down Expand Up @@ -128,7 +128,7 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
;
; AVX-LABEL: extract2_i32_zext_insert0_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
Expand Down Expand Up @@ -187,7 +187,7 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
;
; AVX-LABEL: extract0_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; AVX-NEXT: retq
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/combine-and.ll
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ define <8 x i64> @neg_scalar_broadcast_v8i64(i64 %a0, <2 x i64> %a1) {
; AVX1-LABEL: neg_scalar_broadcast_v8i64:
; AVX1: # %bb.0:
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,3]
Expand Down Expand Up @@ -790,7 +790,7 @@ define <2 x i64> @casted_neg_scalar_broadcast_v2i64(<2 x i32> %a0, <2 x i64> %a1
;
; AVX1-LABEL: casted_neg_scalar_broadcast_v2i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/combine-concatvectors.ll
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ define <4 x float> @concat_of_broadcast_v4f32_v8f32(ptr %a0, ptr %a1, ptr %a2) {
; AVX1-NEXT: vmovaps (%rdi), %ymm0
; AVX1-NEXT: vmovaps (%rsi), %ymm1
; AVX1-NEXT: vmovaps (%rdx), %ymm2
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4],ymm2[5,6],ymm0[7]
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ define <2 x i64> @vec128_eltty_i64_source_subvec_0_target_subvec_mask_1_binary(<
define <4 x float> @vec128_eltty_float_source_subvec_0_target_subvec_mask_1_unary(<4 x float> %x) nounwind {
; CHECK-LABEL: vec128_eltty_float_source_subvec_0_target_subvec_mask_1_unary:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
; CHECK-NEXT: retq
%r = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
ret <4 x float> %r
Expand All @@ -65,7 +65,7 @@ define <4 x float> @vec128_eltty_float_source_subvec_0_target_subvec_mask_1_bina
define <4 x i32> @vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_unary(<4 x i32> %x) nounwind {
; CHECK-LABEL: vec128_eltty_i32_source_subvec_0_target_subvec_mask_1_unary:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,0]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
; CHECK-NEXT: retq
%r = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
ret <4 x i32> %r
Expand Down Expand Up @@ -322,7 +322,7 @@ define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_unary(<4
define <4 x i64> @vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-LABEL: vec256_eltty_i64_source_subvec_1_target_subvec_mask_2_binary:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
; CHECK-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; CHECK-NEXT: retq
%r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
Expand Down Expand Up @@ -448,7 +448,7 @@ define <8 x float> @vec256_eltty_float_source_subvec_1_target_subvec_mask_2_bina
;
; CHECK-FAST-LABEL: vec256_eltty_float_source_subvec_1_target_subvec_mask_2_binary:
; CHECK-FAST: # %bb.0:
; CHECK-FAST-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
; CHECK-FAST-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
; CHECK-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
; CHECK-FAST-NEXT: retq
%r = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
Expand Down Expand Up @@ -577,7 +577,7 @@ define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary(<
;
; CHECK-FAST-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_2_binary:
; CHECK-FAST: # %bb.0:
; CHECK-FAST-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
; CHECK-FAST-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
; CHECK-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7]
; CHECK-FAST-NEXT: retq
%r = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
Expand All @@ -598,7 +598,7 @@ define <8 x i32> @vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary(<
; CHECK-SLOW-LABEL: vec256_eltty_i32_source_subvec_1_target_subvec_mask_3_binary:
; CHECK-SLOW: # %bb.0:
; CHECK-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,2,3]
; CHECK-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
; CHECK-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
; CHECK-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
; CHECK-SLOW-NEXT: retq
;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/extract-concat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ define <16 x i64> @catcat(<4 x i64> %x) {
;
; AVX1-LABEL: catcat:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm4
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX1-NEXT: vmovddup {{.*#+}} ymm2 = ymm0[0,0,2,2]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/extract-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ define void @extract_i64_1(ptr nocapture %dst, <2 x i64> %foo) nounwind {
; AVX-X86-LABEL: extract_i64_1:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX-X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX-X86-NEXT: vmovlps %xmm0, (%eax)
; AVX-X86-NEXT: retl
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/fdiv-combine-vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ define <4 x float> @splat_fdiv_v4f32(<4 x float> %x, float %y) {
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vdivss %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%vy = insertelement <4 x float> undef, float %y, i32 0
Expand All @@ -84,7 +84,7 @@ define <8 x float> @splat_fdiv_v8f32(<8 x float> %x, float %y) {
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vdivss %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
Expand Down Expand Up @@ -115,7 +115,7 @@ define <4 x float> @splat_fdiv_v4f32_estimate(<4 x float> %x, float %y) #0 {
; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm1
; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%vy = insertelement <4 x float> undef, float %y, i32 0
Expand Down Expand Up @@ -146,7 +146,7 @@ define <8 x float> @splat_fdiv_v8f32_estimate(<8 x float> %x, float %y) #0 {
; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm1
; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/fmaddsub-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,8 @@ define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D,
; NOFMA-NEXT: vsubss %xmm5, %xmm4, %xmm4
; NOFMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2,3]
; NOFMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; NOFMA-NEXT: vsubss %xmm2, %xmm0, %xmm0
; NOFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; NOFMA-NEXT: retq
Expand Down Expand Up @@ -578,17 +578,17 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
; NOFMA-NEXT: vsubss %xmm9, %xmm8, %xmm8
; NOFMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[2,3]
; NOFMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; NOFMA-NEXT: vsubss %xmm2, %xmm0, %xmm0
; NOFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; NOFMA-NEXT: vmovshdup {{.*#+}} xmm1 = xmm4[1,1,3,3]
; NOFMA-NEXT: vmovshdup {{.*#+}} xmm2 = xmm5[1,1,3,3]
; NOFMA-NEXT: vsubss %xmm2, %xmm1, %xmm1
; NOFMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm6[0],xmm1[0],xmm6[2,3]
; NOFMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm7[0],xmm1[3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm2 = xmm4[3,3,3,3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm3 = xmm5[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm2 = xmm4[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm3 = xmm5[3,3,3,3]
; NOFMA-NEXT: vsubss %xmm3, %xmm2, %xmm2
; NOFMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; NOFMA-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Expand Down Expand Up @@ -719,29 +719,29 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
; NOFMA-NEXT: vsubss %xmm15, %xmm14, %xmm14
; NOFMA-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[2,3]
; NOFMA-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm3 = xmm4[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm3 = xmm4[3,3,3,3]
; NOFMA-NEXT: vsubss %xmm3, %xmm0, %xmm0
; NOFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm2 = xmm6[3,3,3,3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm3 = xmm7[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm2 = xmm6[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm3 = xmm7[3,3,3,3]
; NOFMA-NEXT: vsubss %xmm3, %xmm2, %xmm2
; NOFMA-NEXT: vinsertps {{.*#+}} xmm2 = xmm8[0,1,2],xmm2[0]
; NOFMA-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; NOFMA-NEXT: vmovshdup {{.*#+}} xmm4 = xmm5[1,1,3,3]
; NOFMA-NEXT: vsubss %xmm4, %xmm3, %xmm3
; NOFMA-NEXT: vinsertps {{.*#+}} xmm3 = xmm9[0],xmm3[0],xmm9[2,3]
; NOFMA-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm10[0],xmm3[3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm4 = xmm5[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm4 = xmm5[3,3,3,3]
; NOFMA-NEXT: vsubss %xmm4, %xmm1, %xmm1
; NOFMA-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; NOFMA-NEXT: vmovshdup {{.*#+}} xmm3 = xmm11[1,1,3,3]
; NOFMA-NEXT: vmovshdup {{.*#+}} xmm4 = xmm13[1,1,3,3]
; NOFMA-NEXT: vsubss %xmm4, %xmm3, %xmm3
; NOFMA-NEXT: vshufps {{.*#+}} xmm3 = xmm3[0,0],xmm12[0,0]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm4 = xmm11[3,3,3,3]
; NOFMA-NEXT: vpermilps {{.*#+}} xmm5 = xmm13[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm4 = xmm11[3,3,3,3]
; NOFMA-NEXT: vshufps {{.*#+}} xmm5 = xmm13[3,3,3,3]
; NOFMA-NEXT: vsubss %xmm5, %xmm4, %xmm4
; NOFMA-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
; NOFMA-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/haddsub-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -907,12 +907,12 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
; AVX-LABEL: not_a_hsub_2:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm3 = xmm0[3,3,3,3]
; AVX-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX-NEXT: vsubss %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
; AVX-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/haddsub-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a0, <8 x float> %a1) {
; AVX1: # %bb.0:
; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: retq
;
; AVX2-LABEL: hadd_reverse_v8f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: retq
%lhs = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 7, i32 5, i32 15, i32 13, i32 3, i32 1, i32 11, i32 9>
Expand All @@ -96,17 +96,17 @@ define <8 x float> @hadd_reverse2_v8f32(<8 x float> %a0, <8 x float> %a1) {
; AVX1-LABEL: hadd_reverse2_v8f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: hadd_reverse2_v8f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,0,1]
; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
Expand All @@ -133,13 +133,13 @@ define <8 x float> @hadd_reverse3_v8f32(<8 x float> %a0, <8 x float> %a1) {
; AVX1: # %bb.0:
; AVX1-NEXT: vhaddps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: retq
;
; AVX2-LABEL: hadd_reverse3_v8f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vhaddps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: retq
%shuf0 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
Expand Down Expand Up @@ -337,17 +337,17 @@ define <16 x float> @hadd_reverse_v16f32(<16 x float> %a0, <16 x float> %a1) nou
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm3[2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm2[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[1,0,3,2,5,4,7,6]
; AVX1-NEXT: retq
;
; AVX2-LABEL: hadd_reverse_v16f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vhaddps %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm1[2,0,3,1]
; AVX2-NEXT: vhaddps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,0,3,1]
; AVX2-NEXT: vmovaps %ymm3, %ymm0
; AVX2-NEXT: retq
Expand Down Expand Up @@ -380,20 +380,20 @@ define <16 x float> @hadd_reverse2_v16f32(<16 x float> %a0, <16 x float> %a1) no
; AVX1: # %bb.0:
; AVX1-NEXT: vhaddps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} ymm3 = ymm1[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm1[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vhaddps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm0[1,0,3,2,5,4,7,6]
; AVX1-NEXT: vmovaps %ymm3, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: hadd_reverse2_v16f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vhaddps %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm1[2,3,0,1]
; AVX2-NEXT: vhaddps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vmovaps %ymm3, %ymm0
; AVX2-NEXT: retq
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/X86/haddsub-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,15 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm1
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm0, %xmm2, %xmm0
; AVX-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-SLOW-NEXT: retq
;
; AVX-FAST-LABEL: test8_undef:
; AVX-FAST: # %bb.0:
; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; AVX-FAST-NEXT: retq
%vecext = extractelement <4 x float> %a, i32 0
%vecext1 = extractelement <4 x float> %a, i32 1
Expand Down Expand Up @@ -377,15 +377,15 @@ define <16 x float> @test13_v16f32_undef(<16 x float> %a, <16 x float> %b) {
; AVX512-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm1
; AVX512-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-SLOW-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3]
; AVX512-SLOW-NEXT: vshufps {{.*#+}} xmm3 = xmm0[3,3,3,3]
; AVX512-SLOW-NEXT: vaddss %xmm3, %xmm2, %xmm2
; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX512-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX512-SLOW-NEXT: vaddss %xmm2, %xmm0, %xmm2
; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX512-SLOW-NEXT: vaddss %xmm0, %xmm2, %xmm0
; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512-SLOW-NEXT: retq
Expand Down Expand Up @@ -520,15 +520,15 @@ define <4 x float> @add_ps_030(<4 x float> %x) {
;
; AVX-SLOW-LABEL: add_ps_030:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,2,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,1,2,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0,2,3]
; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-SLOW-NEXT: retq
;
; AVX-FAST-LABEL: add_ps_030:
; AVX-FAST: # %bb.0:
; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,2,3]
; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,2,3]
; AVX-FAST-NEXT: retq
%l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 2>
%r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 3>
Expand Down Expand Up @@ -591,7 +591,7 @@ define <4 x float> @add_ps_016(<4 x float> %0, <4 x float> %1) {
; AVX-LABEL: add_ps_016:
; AVX: # %bb.0:
; AVX-NEXT: vhaddps %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,3,3]
; AVX-NEXT: retq
%3 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> <i32 0, i32 6>
%4 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> <i32 1, i32 7>
Expand Down Expand Up @@ -620,8 +620,8 @@ define <4 x float> @add_ps_017(<4 x float> %x) {
;
; AVX-SLOW-LABEL: add_ps_017:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-SLOW-NEXT: retq
;
Expand Down Expand Up @@ -943,8 +943,8 @@ define <4 x float> @PR45747_1(<4 x float> %a, <4 x float> %b) nounwind {
;
; AVX-SLOW-LABEL: PR45747_1:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,2,2,2]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,2,2]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-SLOW-NEXT: retq
;
Expand Down Expand Up @@ -976,7 +976,7 @@ define <4 x float> @PR45747_2(<4 x float> %a, <4 x float> %b) nounwind {
; AVX-SLOW-LABEL: PR45747_2:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,1,1]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,1,1]
; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX-SLOW-NEXT: retq
;
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/X86/haddsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ define float @extract_extract23_v4f32_fadd_f32(<4 x float> %x) {
; AVX-SLOW-LABEL: extract_extract23_v4f32_fadd_f32:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-SLOW-NEXT: retq
;
Expand Down Expand Up @@ -563,7 +563,7 @@ define float @extract_extract23_v4f32_fadd_f32_commute(<4 x float> %x) {
; AVX-SLOW-LABEL: extract_extract23_v4f32_fadd_f32_commute:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-SLOW-NEXT: retq
;
Expand Down Expand Up @@ -683,7 +683,7 @@ define float @extract_extract23_v4f32_fsub_f32(<4 x float> %x) {
; AVX-SLOW-LABEL: extract_extract23_v4f32_fsub_f32:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vsubss %xmm0, %xmm1, %xmm0
; AVX-SLOW-NEXT: retq
;
Expand Down Expand Up @@ -729,7 +729,7 @@ define float @extract_extract23_v4f32_fsub_f32_commute(<4 x float> %x) {
; AVX-LABEL: extract_extract23_v4f32_fsub_f32_commute:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%x0 = extractelement <4 x float> %x, i32 2
Expand Down Expand Up @@ -837,7 +837,7 @@ define float @extract_extract23_v8f32_fadd_f32(<8 x float> %x) {
; AVX-SLOW-LABEL: extract_extract23_v8f32_fadd_f32:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-SLOW-NEXT: vzeroupper
; AVX-SLOW-NEXT: retq
Expand Down Expand Up @@ -873,7 +873,7 @@ define float @extract_extract67_v8f32_fadd_f32(<8 x float> %x) {
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-SLOW-NEXT: vzeroupper
; AVX-SLOW-NEXT: retq
Expand Down Expand Up @@ -939,7 +939,7 @@ define float @extract_extract23_v8f32_fadd_f32_commute(<8 x float> %x) {
; AVX-SLOW-LABEL: extract_extract23_v8f32_fadd_f32_commute:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-SLOW-NEXT: vzeroupper
; AVX-SLOW-NEXT: retq
Expand Down Expand Up @@ -975,7 +975,7 @@ define float @extract_extract67_v8f32_fadd_f32_commute(<8 x float> %x) {
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-SLOW-NEXT: vzeroupper
; AVX-SLOW-NEXT: retq
Expand Down Expand Up @@ -1172,7 +1172,7 @@ define float @extract_extract23_v8f32_fsub_f32(<8 x float> %x) {
; AVX-SLOW-LABEL: extract_extract23_v8f32_fsub_f32:
; AVX-SLOW: # %bb.0:
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vsubss %xmm0, %xmm1, %xmm0
; AVX-SLOW-NEXT: vzeroupper
; AVX-SLOW-NEXT: retq
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
Expand All @@ -470,7 +470,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
Expand Down Expand Up @@ -531,7 +531,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
Expand All @@ -543,7 +543,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
Expand Down Expand Up @@ -1122,7 +1122,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1
; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
Expand All @@ -1137,7 +1137,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
Expand Down Expand Up @@ -1240,7 +1240,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1
; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
Expand All @@ -1254,7 +1254,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
Expand All @@ -473,7 +473,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
Expand Down Expand Up @@ -535,7 +535,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
Expand All @@ -547,7 +547,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
Expand Down Expand Up @@ -1126,7 +1126,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1
; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
Expand All @@ -1141,7 +1141,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
Expand Down Expand Up @@ -1244,7 +1244,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1
; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
Expand All @@ -1258,7 +1258,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vmovq %xmm0, %rax
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
;
; X86-AVX1-LABEL: test_reduce_v2i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
Expand Down Expand Up @@ -531,7 +531,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -549,7 +549,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -621,7 +621,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm4
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -638,7 +638,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -1236,7 +1236,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm3, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm0, %xmm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -1258,7 +1258,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down Expand Up @@ -1381,7 +1381,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm3, %xmm4
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm0, %xmm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -1402,7 +1402,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
;
; X86-AVX1-LABEL: test_reduce_v2i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
Expand Down Expand Up @@ -475,7 +475,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-NEXT: vxorps %xmm1, %xmm3, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
Expand All @@ -493,7 +493,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -567,7 +567,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX1-NEXT: vpxor %xmm1, %xmm3, %xmm4
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3
; X64-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
Expand All @@ -584,7 +584,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -1152,7 +1152,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm3, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand All @@ -1174,7 +1174,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down Expand Up @@ -1299,7 +1299,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm2, %xmm4
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Expand All @@ -1320,7 +1320,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/horizontal-shuffle-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ define <4 x float> @test_unpacklo_hadd_v4f32(<4 x float> %0, <4 x float> %1, <4
; AVX-LABEL: test_unpacklo_hadd_v4f32:
; AVX: ## %bb.0:
; AVX-NEXT: vhaddps %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: ret{{[l|q]}}
%5 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %1) #4
%6 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %3) #4
Expand All @@ -33,7 +33,7 @@ define <4 x float> @test_unpackhi_hadd_v4f32(<4 x float> %0, <4 x float> %1, <4
; AVX-LABEL: test_unpackhi_hadd_v4f32:
; AVX: ## %bb.0:
; AVX-NEXT: vhaddps %xmm3, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: ret{{[l|q]}}
%5 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %1) #4
%6 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %3) #4
Expand All @@ -51,7 +51,7 @@ define <4 x float> @test_unpacklo_hsub_v4f32(<4 x float> %0, <4 x float> %1, <4
; AVX-LABEL: test_unpacklo_hsub_v4f32:
; AVX: ## %bb.0:
; AVX-NEXT: vhsubps %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: ret{{[l|q]}}
%5 = tail call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %0, <4 x float> %1) #4
%6 = tail call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %2, <4 x float> %3) #4
Expand All @@ -70,7 +70,7 @@ define <4 x float> @test_unpackhi_hsub_v4f32(<4 x float> %0, <4 x float> %1, <4
; AVX-LABEL: test_unpackhi_hsub_v4f32:
; AVX: ## %bb.0:
; AVX-NEXT: vhsubps %xmm3, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: ret{{[l|q]}}
%5 = tail call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %0, <4 x float> %1) #4
%6 = tail call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %2, <4 x float> %3) #4
Expand Down Expand Up @@ -164,7 +164,7 @@ define <4 x float> @test_unpacklo_hadd_v4f32_unary(<4 x float> %0) {
; AVX-LABEL: test_unpacklo_hadd_v4f32_unary:
; AVX: ## %bb.0:
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; AVX-NEXT: ret{{[l|q]}}
%2 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %0) #4
%3 = shufflevector <4 x float> %2, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/horizontal-shuffle-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ define <8 x float> @test_unpacklo_hadd_v8f32(<8 x float> %0, <8 x float> %1, <8
; CHECK-LABEL: test_unpacklo_hadd_v8f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vhaddps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; CHECK-NEXT: ret{{[l|q]}}
%5 = tail call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %0, <8 x float> %1) #4
%6 = tail call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %2, <8 x float> %3) #4
Expand All @@ -18,7 +18,7 @@ define <8 x float> @test_unpackhi_hadd_v8f32(<8 x float> %0, <8 x float> %1, <8
; CHECK-LABEL: test_unpackhi_hadd_v8f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vhaddps %ymm3, %ymm1, %ymm0
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; CHECK-NEXT: ret{{[l|q]}}
%5 = tail call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %0, <8 x float> %1) #4
%6 = tail call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %2, <8 x float> %3) #4
Expand All @@ -30,7 +30,7 @@ define <8 x float> @test_unpacklo_hsub_v8f32(<8 x float> %0, <8 x float> %1, <8
; CHECK-LABEL: test_unpacklo_hsub_v8f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vhsubps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; CHECK-NEXT: ret{{[l|q]}}
%5 = tail call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %0, <8 x float> %1) #4
%6 = tail call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %2, <8 x float> %3) #4
Expand All @@ -42,7 +42,7 @@ define <8 x float> @test_unpackhi_hsub_v8f32(<8 x float> %0, <8 x float> %1, <8
; CHECK-LABEL: test_unpackhi_hsub_v8f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vhsubps %ymm3, %ymm1, %ymm0
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; CHECK-NEXT: ret{{[l|q]}}
%5 = tail call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %0, <8 x float> %1) #4
%6 = tail call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %2, <8 x float> %3) #4
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/horizontal-shuffle-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ define <8 x float> @permute_hadd_hadd_256(<8 x float> %a0, <8 x float> %a1, <8 x
; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vhaddps %ymm3, %ymm2, %ymm1
; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
%2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a2, <8 x float> %a3)
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/horizontal-sum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ define <8 x float> @pair_sum_v8f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
; AVX1-SLOW-LABEL: pair_sum_v8f32_v4f32:
; AVX1-SLOW: # %bb.0:
; AVX1-SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3]
; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX1-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,3,1,3]
; AVX1-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX1-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX1-SLOW-NEXT: vhaddps %xmm4, %xmm4, %xmm1
; AVX1-SLOW-NEXT: vhaddps %xmm5, %xmm5, %xmm4
Expand Down Expand Up @@ -262,8 +262,8 @@ define <8 x float> @pair_sum_v8f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
; AVX2-SLOW-LABEL: pair_sum_v8f32_v4f32:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3]
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,3,1,3]
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX2-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX2-SLOW-NEXT: vhaddps %xmm4, %xmm4, %xmm1
; AVX2-SLOW-NEXT: vhaddps %xmm5, %xmm5, %xmm4
Expand Down Expand Up @@ -893,27 +893,27 @@ define <4 x float> @reduction_sum_v4f32_v4f32(<4 x float> %0, <4 x float> %1, <4
; AVX-SLOW-NEXT: vaddss %xmm4, %xmm0, %xmm4
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0]
; AVX-SLOW-NEXT: vaddss %xmm5, %xmm4, %xmm4
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm0, %xmm4, %xmm0
; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; AVX-SLOW-NEXT: vaddss %xmm4, %xmm1, %xmm4
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
; AVX-SLOW-NEXT: vaddss %xmm5, %xmm4, %xmm4
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm1, %xmm4, %xmm1
; AVX-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; AVX-SLOW-NEXT: vaddss %xmm1, %xmm2, %xmm1
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
; AVX-SLOW-NEXT: vaddss %xmm4, %xmm1, %xmm1
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; AVX-SLOW-NEXT: vaddss %xmm1, %xmm3, %xmm1
; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm3[1,0]
; AVX-SLOW-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm2 = xmm3[3,3,3,3]
; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm3[3,3,3,3]
; AVX-SLOW-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX-SLOW-NEXT: retq
Expand All @@ -923,24 +923,24 @@ define <4 x float> @reduction_sum_v4f32_v4f32(<4 x float> %0, <4 x float> %1, <4
; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm4
; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0]
; AVX-FAST-NEXT: vaddss %xmm5, %xmm4, %xmm4
; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; AVX-FAST-NEXT: vaddss %xmm0, %xmm4, %xmm0
; AVX-FAST-NEXT: vhaddps %xmm1, %xmm1, %xmm4
; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
; AVX-FAST-NEXT: vaddss %xmm5, %xmm4, %xmm4
; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX-FAST-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
; AVX-FAST-NEXT: vaddss %xmm1, %xmm4, %xmm1
; AVX-FAST-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX-FAST-NEXT: vhaddps %xmm2, %xmm2, %xmm1
; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
; AVX-FAST-NEXT: vaddss %xmm4, %xmm1, %xmm1
; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
; AVX-FAST-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-FAST-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX-FAST-NEXT: vhaddps %xmm3, %xmm3, %xmm1
; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm2 = xmm3[1,0]
; AVX-FAST-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm2 = xmm3[3,3,3,3]
; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm3[3,3,3,3]
; AVX-FAST-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-FAST-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX-FAST-NEXT: retq
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/i64-to-float.ll
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
; X86-AVX-NEXT: # xmm1 = mem[0,0]
; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X86-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
Expand Down Expand Up @@ -359,7 +359,7 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
; X64-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; X64-AVX-NEXT: retq
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/insertelement-var-index.ll
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ define <4 x float> @arg_f32_v4f32_undef(float %x, i32 %y) nounwind {
;
; AVX1-LABEL: arg_f32_v4f32_undef:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: arg_f32_v4f32_undef:
Expand Down Expand Up @@ -578,7 +578,7 @@ define <8 x float> @arg_f32_v8f32_undef(float %x, i32 %y) nounwind {
;
; AVX1-LABEL: arg_f32_v8f32_undef:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
Expand Down Expand Up @@ -1053,7 +1053,7 @@ define <4 x float> @arg_f32_v4f32(<4 x float> %v, float %x, i32 %y) nounwind {
;
; AVX1-LABEL: arg_f32_v4f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
Expand Down Expand Up @@ -1777,7 +1777,7 @@ define <8 x float> @arg_f32_v8f32(<8 x float> %v, float %x, i32 %y) nounwind {
;
; AVX1-LABEL: arg_f32_v8f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/known-bits-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,14 @@ define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounw
define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
; X86-LABEL: knownbits_mask_shuffle_uitofp:
; X86: # %bb.0:
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: knownbits_mask_shuffle_uitofp:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
Expand Down Expand Up @@ -173,15 +173,15 @@ define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
; X86-LABEL: knownbits_mask_xor_shuffle_uitofp:
; X86: # %bb.0:
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
Expand Down Expand Up @@ -384,21 +384,21 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
; X86-LABEL: knownbits_mask_concat_uitofp:
; X86: # %bb.0:
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
; X86-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
; X86-NEXT: vmovaps {{.*#+}} xmm2 = [131071,131071,131071,131071]
; X86-NEXT: vandps %xmm2, %xmm1, %xmm1
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
; X86-NEXT: vandps %xmm2, %xmm0, %xmm0
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: knownbits_mask_concat_uitofp:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
; X64-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
; X64-NEXT: vmovaps {{.*#+}} xmm2 = [131071,131071,131071,131071]
; X64-NEXT: vandps %xmm2, %xmm1, %xmm1
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
; X64-NEXT: vandps %xmm2, %xmm0, %xmm0
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
Expand Down Expand Up @@ -601,7 +601,7 @@ define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32>
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3, %xmm3
; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; X86-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
Expand All @@ -613,7 +613,7 @@ define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32>
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%1 = and <4 x i32> %a2, <i32 65535, i32 -1, i32 255, i32 -1>
Expand All @@ -637,7 +637,7 @@ define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3, %xmm3
; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; X86-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
Expand All @@ -649,7 +649,7 @@ define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%1 = lshr <4 x i32> %a2, <i32 5, i32 1, i32 5, i32 1>
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/known-signbits-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
; X86-LABEL: signbits_ashr_extract_sitofp_0:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
Expand All @@ -157,7 +157,7 @@ define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
;
; X64-LABEL: signbits_ashr_extract_sitofp_0:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 32, i64 32>
Expand All @@ -170,7 +170,7 @@ define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
; X86-LABEL: signbits_ashr_extract_sitofp_1:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
Expand All @@ -179,7 +179,7 @@ define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
;
; X64-LABEL: signbits_ashr_extract_sitofp_1:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 32, i64 63>
Expand Down Expand Up @@ -322,7 +322,7 @@ define <2 x double> @signbits_sext_shl_sitofp(<2 x i16> %a0) nounwind {
define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind {
; CHECK-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3,2,3]
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = ashr <2 x i64> %a0, <i64 16, i64 16>
Expand Down Expand Up @@ -415,7 +415,7 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
; X86-NEXT: vpsrad $1, %xmm2, %xmm2
; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
; X86-NEXT: vpermilps {{.*#+}} xmm5 = xmm3[2,2,3,3]
; X86-NEXT: vshufps {{.*#+}} xmm5 = xmm3[2,2,3,3]
; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6
; X86-NEXT: vextractf128 $1, %ymm1, %xmm1
; X86-NEXT: vextractf128 $1, %ymm0, %xmm0
Expand Down Expand Up @@ -443,7 +443,7 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
; X64-AVX1-NEXT: vpsrad $1, %xmm2, %xmm2
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm5 = xmm3[2,2,3,3]
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm3[2,2,3,3]
; X64-AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6
; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Expand All @@ -465,7 +465,7 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
; X64-AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; X64-AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/masked_store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4958,7 +4958,7 @@ define void @one_mask_bit_set6(ptr %addr, <16 x i64> %val) {
; X86-AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
; X86-AVX512-NEXT: vmovlps %xmm0, 48(%eax)
; X86-AVX512-NEXT: vextractf128 $1, %ymm1, %xmm0
; X86-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X86-AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X86-AVX512-NEXT: vmovlps %xmm0, 88(%eax)
; X86-AVX512-NEXT: vzeroupper
; X86-AVX512-NEXT: retl
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/masked_store_trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1649,7 +1649,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
; AVX1-NEXT: retq
;
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2440,7 +2440,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
; AVX1-NEXT: retq
;
Expand All @@ -2457,7 +2457,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3
; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX2-NEXT: vpmaskmovd %xmm0, %xmm1, (%rdi)
; AVX2-NEXT: retq
;
Expand Down Expand Up @@ -2597,7 +2597,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848]
; AVX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm4
; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX-NEXT: vmovmskpd %xmm1, %eax
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2142,7 +2142,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372041149743103,9223372041149743103]
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX1-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
; AVX1-NEXT: retq
;
Expand All @@ -2158,7 +2158,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372041149743103,9223372041149743103]
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX2-NEXT: vpmaskmovd %xmm0, %xmm1, (%rdi)
; AVX2-NEXT: retq
;
Expand Down Expand Up @@ -2279,7 +2279,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343]
; AVX-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
; AVX-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX-NEXT: vmovmskpd %xmm1, %eax
Expand Down
Loading