336 changes: 0 additions & 336 deletions llvm/test/CodeGen/X86/avx-shuffle.ll

This file was deleted.

11 changes: 5 additions & 6 deletions llvm/test/CodeGen/X86/avx-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ entry:
ret <32 x i8> %shuffle
}

; CHECK: vpunpckhwd %xmm
; CHECK-NEXT: vpshufd $85
; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
; CHECK-NEXT: vinsertf128 $1
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
entry:
Expand All @@ -19,7 +18,7 @@ entry:
}

; CHECK: vmovq
; CHECK-NEXT: vmovlhps %xmm
; CHECK-NEXT: vunpcklpd %xmm
; CHECK-NEXT: vinsertf128 $1
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
entry:
Expand Down Expand Up @@ -70,7 +69,7 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex
ret <8 x float> %load_broadcast12281250
}

; CHECK: vpshufd $0
; CHECK: vpermilps $4
; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcF(i32 %val) nounwind {
%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
Expand All @@ -79,7 +78,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
ret <8 x float> %tmp
}

; CHECK: vpshufd $0
; CHECK: vpermilps $0
; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
entry:
Expand All @@ -88,7 +87,7 @@ entry:
}

; CHECK: vextractf128 $1
; CHECK-NEXT: vpshufd
; CHECK-NEXT: vpermilps $85
; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
entry:
Expand Down
14 changes: 0 additions & 14 deletions llvm/test/CodeGen/X86/avx-vmovddup.ll

This file was deleted.

177 changes: 104 additions & 73 deletions llvm/test/CodeGen/X86/avx-vperm2x128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2

define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: A:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; ALL-NEXT: retq
; AVX1-LABEL: A:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: A:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x float> %shuffle
Expand All @@ -14,48 +19,74 @@ entry:
define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: B:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
ret <8 x float> %shuffle
}

define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: C:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
; ALL-NEXT: retq
; AVX1-LABEL: C:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: C:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <8 x float> %shuffle
}

define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: D:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; ALL-NEXT: retq
; AVX1-LABEL: D:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: D:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX2-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
ret <8 x float> %shuffle
}

define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: E:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; ALL-NEXT: retq
; AVX1-LABEL: E:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: E:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX2-NEXT: retq
entry:
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <32 x i8> %shuffle
}

define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: E2:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
; ALL-NEXT: retq
; AVX1-LABEL: E2:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: E2:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
entry:
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i64> %shuffle
Expand All @@ -64,18 +95,15 @@ entry:
define <32 x i8> @Ei(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
; AVX1-LABEL: Ei:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: Ei:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpaddb {{.*}}, %ymm0, %ymm0
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX2-NEXT: retq
entry:
; add forces execution domain
Expand All @@ -87,19 +115,19 @@ entry:
define <4 x i64> @E2i(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; AVX1-LABEL: E2i:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1]
; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddq %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: E2i:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpbroadcastq {{.*}}, %ymm2
; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
entry:
; add forces execution domain
Expand All @@ -111,19 +139,17 @@ entry:
define <8 x i32> @E3i(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
; AVX1-LABEL: E3i:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1]
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: E3i:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpbroadcastd {{.*}}, %ymm2
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
entry:
; add forces execution domain
Expand All @@ -135,18 +161,16 @@ entry:
define <16 x i16> @E4i(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
; AVX1-LABEL: E4i:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1]
; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1]
; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: E4i:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpaddw {{.*}}, %ymm0, %ymm0
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1]
; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
entry:
; add forces execution domain
Expand All @@ -158,20 +182,20 @@ entry:
define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
; AVX1-LABEL: E5i:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vmovaps (%rdi), %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[0,1],ymm0[0,1]
; AVX1-NEXT: vmovdqa (%rdi), %ymm0
; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovapd (%rsi), %ymm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: E5i:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-NEXT: vpaddw {{.*}}, %ymm0, %ymm0
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = mem[0,1],ymm0[0,1]
; AVX2-NEXT: vmovdqa (%rsi), %ymm1
; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
entry:
%c = load <16 x i16>* %a
Expand All @@ -184,10 +208,19 @@ entry:
;;;; Cases with undef indicies mixed in the mask

define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: F:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; ALL-NEXT: retq
; AVX1-LABEL: F:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: F:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1]
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
; AVX2-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
ret <8 x float> %shuffle
Expand All @@ -199,17 +232,15 @@ define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone
; AVX1-LABEL: G:
; AVX1: ## BB#0: ## %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,3]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: G:
; AVX2: ## BB#0: ## %entry
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,3]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
; AVX2-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
Expand Down
54 changes: 0 additions & 54 deletions llvm/test/CodeGen/X86/avx-vpermil.ll

This file was deleted.

157 changes: 0 additions & 157 deletions llvm/test/CodeGen/X86/avx-vshufp.ll

This file was deleted.

57 changes: 0 additions & 57 deletions llvm/test/CodeGen/X86/avx2-palignr.ll

This file was deleted.

127 changes: 0 additions & 127 deletions llvm/test/CodeGen/X86/avx2-shuffle.ll

This file was deleted.

86 changes: 0 additions & 86 deletions llvm/test/CodeGen/X86/avx2-unpack.ll

This file was deleted.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx2-vbroadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ define <4 x double> @_inreg4xdouble(<4 x double> %a) {
}

;CHECK-LABEL: _inreg2xdouble:
;CHECK: vpbroadcastq
;CHECK: vunpcklpd
;CHECK: ret
define <2 x double> @_inreg2xdouble(<2 x double> %a) {
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
Expand Down
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/X86/avx512-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,10 @@ entry:
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
; CHECK-LABEL: andqbrst:
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; CHECK-NEXT: vmovq (%rdi), %xmm1
; CHECK-NEXT: vpbroadcastq %xmm1, %ymm1
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
%a = load i64* %ap, align 8
Expand Down
11 changes: 5 additions & 6 deletions llvm/test/CodeGen/X86/avx512-build-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ define <16 x i32> @test1(i32* %x) {
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd (%rdi), %xmm0
; CHECK-NEXT: vmovdqa32 {{.*}}(%rip), %zmm1
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpermt2d %zmm0, %zmm1, %zmm2
; CHECK-NEXT: vmovdqa32 {{.*}}(%rip), %zmm0
; CHECK-NEXT: vpermd %zmm2, %zmm0, %zmm0
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7]
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%y = load i32* %x, align 4
%res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
Expand All @@ -27,7 +26,7 @@ define <16 x i32> @test2(<16 x i32> %x) {
define <16 x float> @test3(<4 x float> %a) {
; CHECK-LABEL: test3:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovhlps %xmm0, %xmm0, %xmm1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vmovss %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vmovss %xmm1, %xmm2, %xmm1
Expand Down
362 changes: 0 additions & 362 deletions llvm/test/CodeGen/X86/avx512-shuffle.ll

This file was deleted.

22 changes: 16 additions & 6 deletions llvm/test/CodeGen/X86/avx512-vbroadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
define <16 x i32> @_inreg16xi32(i32 %a) {
; CHECK-LABEL: _inreg16xi32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpbroadcastd %edi, %zmm0
; CHECK-NEXT: vmovd %edi, %xmm0
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = insertelement <16 x i32> undef, i32 %a, i32 0
%c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
Expand All @@ -13,7 +15,9 @@ define <16 x i32> @_inreg16xi32(i32 %a) {
define <8 x i64> @_inreg8xi64(i64 %a) {
; CHECK-LABEL: _inreg8xi64:
; CHECK: ## BB#0:
; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
; CHECK-NEXT: vmovq %rdi, %xmm0
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = insertelement <8 x i64> undef, i64 %a, i32 0
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
Expand All @@ -23,7 +27,9 @@ define <8 x i64> @_inreg8xi64(i64 %a) {
define <16 x float> @_inreg16xfloat(float %a) {
; CHECK-LABEL: _inreg16xfloat:
; CHECK: ## BB#0:
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
; CHECK-NEXT: ## kill: XMM0<def> XMM0<kill> ZMM0<def>
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = insertelement <16 x float> undef, float %a, i32 0
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
Expand All @@ -33,7 +39,9 @@ define <16 x float> @_inreg16xfloat(float %a) {
define <8 x double> @_inreg8xdouble(double %a) {
; CHECK-LABEL: _inreg8xdouble:
; CHECK: ## BB#0:
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
; CHECK-NEXT: ## kill: XMM0<def> XMM0<kill> ZMM0<def>
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = insertelement <8 x double> undef, double %a, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
Expand All @@ -43,7 +51,8 @@ define <8 x double> @_inreg8xdouble(double %a) {
define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
; CHECK-LABEL: _xmm16xi32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32> %b
Expand All @@ -52,7 +61,8 @@ define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
define <16 x float> @_xmm16xfloat(<16 x float> %a) {
; CHECK-LABEL: _xmm16xfloat:
; CHECK: ## BB#0:
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT: retq
%b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
ret <16 x float> %b
Expand Down
24 changes: 18 additions & 6 deletions llvm/test/CodeGen/X86/avx512-vec-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,10 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
; CHECK-LABEL: test24:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
; CHECK-NEXT: vmovq (%rdi), %xmm2
; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2
; CHECK-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
Expand All @@ -327,7 +330,10 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
; CHECK-LABEL: test25:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
; CHECK-NEXT: vmovd (%rdi), %xmm2
; CHECK-NEXT: vpbroadcastd %xmm2, %ymm2
; CHECK-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpled %zmm2, %zmm0, %k1
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
Expand All @@ -342,8 +348,11 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind
define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
; CHECK-LABEL: test26:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
; CHECK-NEXT: vmovd (%rdi), %xmm3
; CHECK-NEXT: vpbroadcastd %xmm3, %ymm3
; CHECK-NEXT: vinserti64x4 $1, %ymm3, %zmm3, %zmm3
; CHECK-NEXT: vpcmpgtd %zmm3, %zmm0, %k1
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
Expand All @@ -360,8 +369,11 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32
define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
; CHECK-LABEL: test27:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
; CHECK-NEXT: vmovq (%rdi), %xmm3
; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
; CHECK-NEXT: vinserti64x4 $1, %ymm3, %zmm3, %zmm3
; CHECK-NEXT: vpcmpleq %zmm3, %zmm0, %k1
; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
Expand Down
68 changes: 37 additions & 31 deletions llvm/test/CodeGen/X86/combine-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
Expand All @@ -20,7 +19,8 @@ define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
Expand All @@ -32,7 +32,8 @@ define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test3:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
Expand All @@ -44,8 +45,8 @@ define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test4:
; CHECK: # BB#0:
; CHECK-NEXT: movss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
Expand All @@ -57,7 +58,7 @@ define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test5:
; CHECK: # BB#0:
; CHECK-NEXT: movss %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
Expand All @@ -69,7 +70,7 @@ define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test6:
; CHECK: # BB#0:
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
Expand All @@ -81,7 +82,7 @@ define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test7:
; CHECK: # BB#0:
; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
Expand All @@ -93,8 +94,7 @@ define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test8:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; CHECK-NEXT: retq
%and1 = and <2 x i64> %a, <i64 -1, i64 0>
%and2 = and <2 x i64> %b, <i64 0, i64 -1>
Expand All @@ -106,7 +106,8 @@ define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test9:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
Expand All @@ -118,7 +119,8 @@ define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test10:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%and1 = and <2 x i64> %a, <i64 0, i64 -1>
%and2 = and <2 x i64> %b, <i64 -1, i64 0>
Expand All @@ -130,8 +132,8 @@ define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test11:
; CHECK: # BB#0:
; CHECK-NEXT: movss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
Expand All @@ -143,7 +145,7 @@ define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test12:
; CHECK: # BB#0:
; CHECK-NEXT: movss %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
Expand Down Expand Up @@ -211,10 +213,11 @@ define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test17:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,0]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0]
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
; CHECK-NEXT: orps %xmm1, %xmm2
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
Expand All @@ -228,10 +231,10 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: xorps %xmm3, %xmm3
; CHECK-NEXT: blendps $1, %xmm0, %xmm3
; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[0,0]
; CHECK-NEXT: blendps $1, %xmm1, %xmm2
; CHECK-NEXT: orps %xmm3, %xmm2
; CHECK-NEXT: blendps {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,1,1]
; CHECK-NEXT: blendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; CHECK-NEXT: orps %xmm0, %xmm2
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
Expand All @@ -245,12 +248,13 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test19:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,0]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
; CHECK-NEXT: movdqa %xmm1, %xmm2
; CHECK-NEXT: pslldq $8, %xmm2
; CHECK-NEXT: xorps %xmm3, %xmm3
; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm0[0,3]
; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[0,0]
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,2]
; CHECK-NEXT: por %xmm2, %xmm0
; CHECK-NEXT: orps %xmm3, %xmm2
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
Expand All @@ -275,8 +279,9 @@ define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test21:
; CHECK: # BB#0:
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pslldq $8, %xmm0
; CHECK-NEXT: orps %xmm1, %xmm0
; CHECK-NEXT: movq %xmm0, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
Expand All @@ -290,7 +295,8 @@ define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: test_crash:
; CHECK: # BB#0:
; CHECK-NEXT: movsd %xmm1, %xmm0
; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/exedepsfix-broadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,9 @@ define <4 x double> @ExeDepsFix_broadcastsd256(<4 x double> %arg, <4 x double> %
; CHECK-LABEL: ExeDepsFix_broadcastsd_inreg
; ExeDepsFix works top down, thus it coalesces vpunpcklqdq domain with
; vpand and there is nothing more you can do to match vmaxpd.
; CHECK: vmovlhps
; CHECK: vandps
; CHECK: vmovq
; CHECK: vpbroadcastq
; CHECK: vpand
; CHECK: vmaxpd
; CHECK: ret
define <2 x double> @ExeDepsFix_broadcastsd_inreg(<2 x double> %arg, <2 x double> %arg2, i64 %broadcastvalue) {
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/extractelement-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ define void @t3() {
;
; This movs the entire vector, shuffling the high double down. If we fixed the
; FIXME above it would just move the high double directly.
; CHECK: movups
; CHECK: movhlps
; CHECK: movlps
; CHECK: movupd
; CHECK: shufpd
; CHECK: movlpd

bb:
%tmp13 = load <2 x double>* undef, align 1
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/fp-load-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ define <4 x float> @test3(<4 x double>* %p) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm1
; CHECK-NEXT: cvtpd2ps (%eax), %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test3:
Expand All @@ -70,10 +70,10 @@ define <8 x float> @test4(<8 x double>* %p) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm1
; CHECK-NEXT: cvtpd2ps (%eax), %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: cvtpd2ps 48(%eax), %xmm2
; CHECK-NEXT: cvtpd2ps 32(%eax), %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test4:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/fp-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ define <4 x float> @test3(<4 x double> %x) nounwind {
; CHECK: # BB#0:
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test3:
Expand All @@ -61,10 +61,10 @@ define <8 x float> @test4(<8 x double> %x) nounwind {
; CHECK: # BB#0:
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: cvtpd2ps %xmm3, %xmm3
; CHECK-NEXT: cvtpd2ps %xmm2, %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test4:
Expand Down
113 changes: 57 additions & 56 deletions llvm/test/CodeGen/X86/palignr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
;
; CHECK-YONAH-LABEL: test3:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[0,0]
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
ret <4 x i32> %C
Expand All @@ -54,8 +54,8 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
;
; CHECK-YONAH-LABEL: test4:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; CHECK-YONAH-NEXT: movaps %xmm1, %xmm0
; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; CHECK-YONAH-NEXT: movapd %xmm1, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
ret <4 x i32> %C
Expand All @@ -64,13 +64,14 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
; CHECK-LABEL: test5:
; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; CHECK-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test5:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; CHECK-YONAH-NEXT: movaps %xmm1, %xmm0
; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; CHECK-YONAH-NEXT: movapd %xmm1, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
ret <4 x float> %C
Expand All @@ -85,15 +86,16 @@ define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
;
; CHECK-YONAH-LABEL: test6:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: movapd %xmm0, %xmm2
; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,2,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,5,6]
; CHECK-YONAH-NEXT: pextrw $3, %xmm0, %eax
; CHECK-YONAH-NEXT: pinsrw $0, %eax, %xmm1
; CHECK-YONAH-NEXT: pextrw $7, %xmm0, %eax
; CHECK-YONAH-NEXT: pinsrw $4, %eax, %xmm1
; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; CHECK-YONAH-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,0,1,2,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,0,2,1,4,5,6,7]
; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-YONAH-NEXT: retl
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
ret <8 x i16> %C
Expand All @@ -108,13 +110,15 @@ define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
;
; CHECK-YONAH-LABEL: test7:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,0,0,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
; CHECK-YONAH-NEXT: movd %xmm1, %eax
; CHECK-YONAH-NEXT: pinsrw $3, %eax, %xmm0
; CHECK-YONAH-NEXT: pextrw $4, %xmm1, %eax
; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-YONAH-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,1,4,5,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-YONAH-NEXT: retl
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
ret <8 x i16> %C
Expand All @@ -129,35 +133,33 @@ define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
;
; CHECK-YONAH-LABEL: test8:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: pushl %esi
; CHECK-YONAH-NEXT: pxor %xmm3, %xmm3
; CHECK-YONAH-NEXT: movdqa %xmm0, %xmm2
; CHECK-YONAH-NEXT: pextrw $4, %xmm2, %eax
; CHECK-YONAH-NEXT: pextrw $5, %xmm2, %ecx
; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
; CHECK-YONAH-NEXT: pextrw $2, %xmm2, %edx
; CHECK-YONAH-NEXT: pextrw $3, %xmm2, %esi
; CHECK-YONAH-NEXT: shrdw $8, %si, %dx
; CHECK-YONAH-NEXT: # kill: XMM0<def> XMM2<kill>
; CHECK-YONAH-NEXT: pinsrw $0, %edx, %xmm0
; CHECK-YONAH-NEXT: shrl $8, %esi
; CHECK-YONAH-NEXT: pinsrw $1, %esi, %xmm0
; CHECK-YONAH-NEXT: pinsrw $2, %eax, %xmm0
; CHECK-YONAH-NEXT: pextrw $6, %xmm2, %eax
; CHECK-YONAH-NEXT: shrdw $8, %ax, %cx
; CHECK-YONAH-NEXT: pinsrw $3, %ecx, %xmm0
; CHECK-YONAH-NEXT: pextrw $7, %xmm2, %ecx
; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
; CHECK-YONAH-NEXT: pinsrw $4, %eax, %xmm0
; CHECK-YONAH-NEXT: pextrw $8, %xmm1, %eax
; CHECK-YONAH-NEXT: shrdw $8, %ax, %cx
; CHECK-YONAH-NEXT: pinsrw $5, %ecx, %xmm0
; CHECK-YONAH-NEXT: pextrw $9, %xmm1, %ecx
; CHECK-YONAH-NEXT: shrdw $8, %cx, %ax
; CHECK-YONAH-NEXT: pinsrw $6, %eax, %xmm0
; CHECK-YONAH-NEXT: pextrw $10, %xmm1, %eax
; CHECK-YONAH-NEXT: shldw $8, %cx, %ax
; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
; CHECK-YONAH-NEXT: popl %esi
; CHECK-YONAH-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,3,4,5,6,7]
; CHECK-YONAH-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,1,2,0]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,1,2,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7]
; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,3,4,5,6,7]
; CHECK-YONAH-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,1]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,2,2,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,4]
; CHECK-YONAH-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-YONAH-NEXT: packuswb %xmm0, %xmm2
; CHECK-YONAH-NEXT: movdqa %xmm2, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
ret <16 x i8> %C
Expand All @@ -170,18 +172,17 @@ define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: test9:
; CHECK: # BB#0:
; CHECK-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,xmm1[4,5,6,7,8,9,10,11,12,13,14,15,0,1]
; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retl
;
; CHECK-YONAH-LABEL: test9:
; CHECK-YONAH: # BB#0:
; CHECK-YONAH-NEXT: pextrw $4, %xmm1, %eax
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,3,0,4,5,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
; CHECK-YONAH-NEXT: pinsrw $3, %eax, %xmm0
; CHECK-YONAH-NEXT: movd %xmm1, %eax
; CHECK-YONAH-NEXT: pinsrw $7, %eax, %xmm0
; CHECK-YONAH-NEXT: retl
%C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
ret <8 x i16> %C
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/pmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
define <4 x i32> @a(<4 x i32> %i) nounwind {
; SSE2-LABEL: a:
; SSE2: movdqa {{.*}}, %[[X1:xmm[0-9]+]]
; SSE2-NEXT: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,0,3,0]
; SSE2-NEXT: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3]
; SSE2-NEXT: pmuludq %[[X1]], %xmm0
; SSE2-NEXT: pmuludq %[[X1]], %[[X2]]
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2]
; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: a:
Expand All @@ -31,12 +31,12 @@ entry:

define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind {
; SSE2-LABEL: c:
; SSE2: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,0,3,0]
; SSE2: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[1,0,3,0]
; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[1,1,3,3]
; SSE2-NEXT: pmuludq %[[X2]], %xmm1
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[0,2]
; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: c:
Expand All @@ -61,14 +61,14 @@ declare void @foo()

define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind {
; SSE2-LABEL: e:
; SSE2: movdqa {{[0-9]*}}(%rsp), %[[X1:xmm[0-9]+]]
; SSE2-NEXT: pshufd {{.*}} # xmm0 = [[X2]][1,0,3,0]
; SSE2: movdqa {{[0-9]*}}(%rsp), %xmm0
; SSE2-NEXT: pshufd {{.*}} # [[X1:xmm[0-9]+]] = xmm0[1,1,3,3]
; SSE2-NEXT: movdqa {{[0-9]*}}(%rsp), %[[X2:xmm[0-9]+]]
; SSE2-NEXT: pmuludq %[[X2]], %[[X1]]
; SSE2-NEXT: pshufd {{.*}} # [[X2]] = [[X2]][1,0,3,0]
; SSE2-NEXT: pmuludq %xmm0, %[[X2]]
; SSE2-NEXT: shufps {{.*}} # [[X1]] = [[X1]][0,2],[[X2]][0,2]
; SSE2-NEXT: pshufd {{.*}} # xmm0 = [[X1]][0,2,1,3]
; SSE2-NEXT: pmuludq %[[X2]], %xmm0
; SSE2-NEXT: pshufd {{.*}} # [[X2]] = [[X2]][1,1,3,3]
; SSE2-NEXT: pmuludq %[[X1]], %[[X2]]
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2]
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
; SSE2-NEXT: addq ${{[0-9]+}}, %rsp
; SSE2-NEXT: retq
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/pr11334.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
entry:
; CHECK: v3f2d_ext_vec
; CHECK: cvtps2pd
; CHECK: movhlps
; CHECK: shufpd
; CHECK: cvtps2pd
; AVX: v3f2d_ext_vec
; AVX: vcvtps2pd
Expand All @@ -28,7 +28,7 @@ define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
entry:
; CHECK: v4f2d_ext_vec
; CHECK: cvtps2pd
; CHECK: movhlps
; CHECK: shufpd
; CHECK: cvtps2pd
; AVX: v4f2d_ext_vec
; AVX: vcvtps2pd
Expand All @@ -42,9 +42,9 @@ entry:
; CHECK: v8f2d_ext_vec
; CHECK: cvtps2pd
; CHECK: cvtps2pd
; CHECK: movhlps
; CHECK: shufpd
; CHECK: cvtps2pd
; CHECK: movhlps
; CHECK: shufpd
; CHECK: cvtps2pd
; AVX: v8f2d_ext_vec
; AVX: vcvtps2pd
Expand Down
10 changes: 0 additions & 10 deletions llvm/test/CodeGen/X86/pr12359.ll

This file was deleted.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/sincos-opt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ entry:

; OSX_SINCOS-LABEL: test1:
; OSX_SINCOS: callq ___sincosf_stret
; OSX_SINCOS: pshufd $1, %xmm0, %xmm1
; OSX_SINCOS: pshufd {{.*}} ## xmm1 = xmm0[1,1,2,3]
; OSX_SINCOS: addss %xmm0, %xmm1

; OSX_NOOPT: test1
Expand Down
17 changes: 0 additions & 17 deletions llvm/test/CodeGen/X86/splat-scalar-load.ll

This file was deleted.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sse-align-12.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
define <4 x float> @a(<4 x float>* %y) nounwind {
; CHECK-LABEL: a:
; CHECK: # BB#0:
; CHECK-NEXT: movdqu (%rdi), %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: movups (%rdi), %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: retq
%x = load <4 x float>* %y, align 4
%a = extractelement <4 x float> %x, i32 0
Expand Down
3 changes: 0 additions & 3 deletions llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
; RUN: llc -mcpu=x86-64 -mattr=+avx < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=AVX %s

target triple = "x86_64-unknown-unknown"

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sse1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
; CHECK-LABEL: test4:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movaps %xmm0, %xmm2
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0,0,0]
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0,0,0]
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; CHECK-NEXT: subss %xmm1, %xmm2
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: ret
Expand Down
14 changes: 0 additions & 14 deletions llvm/test/CodeGen/X86/sse2-mul.ll

This file was deleted.

Loading