48 changes: 24 additions & 24 deletions llvm/test/CodeGen/X86/setcc-wide-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,16 @@ define i32 @eq_i128(<2 x i64> %x, <2 x i64> %y) {
define i32 @ne_i256(<4 x i64> %x, <4 x i64> %y) {
; SSE2-LABEL: ne_i256:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
; SSE2-NEXT: movq %xmm4, %rax
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
; SSE2-NEXT: movq %xmm4, %rcx
; SSE2-NEXT: movq %xmm0, %rdx
; SSE2-NEXT: movq %xmm1, %r8
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rdi
; SSE2-NEXT: xorq %rax, %rdi
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rsi
; SSE2-NEXT: xorq %rcx, %rsi
; SSE2-NEXT: orq %rdi, %rsi
Expand Down Expand Up @@ -155,16 +155,16 @@ define i32 @ne_i256(<4 x i64> %x, <4 x i64> %y) {
define i32 @eq_i256(<4 x i64> %x, <4 x i64> %y) {
; SSE2-LABEL: eq_i256:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
; SSE2-NEXT: movq %xmm4, %rax
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
; SSE2-NEXT: movq %xmm4, %rcx
; SSE2-NEXT: movq %xmm0, %rdx
; SSE2-NEXT: movq %xmm1, %r8
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rdi
; SSE2-NEXT: xorq %rax, %rdi
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rsi
; SSE2-NEXT: xorq %rcx, %rsi
; SSE2-NEXT: orq %rdi, %rsi
Expand Down Expand Up @@ -235,28 +235,28 @@ define i32 @eq_i256(<4 x i64> %x, <4 x i64> %y) {
define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) {
; SSE2-LABEL: ne_i512:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
; SSE2-NEXT: movq %xmm8, %rax
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,2,3]
; SSE2-NEXT: movq %xmm8, %rcx
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,2,3]
; SSE2-NEXT: movq %xmm8, %rdx
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3]
; SSE2-NEXT: movq %xmm8, %rsi
; SSE2-NEXT: movq %xmm0, %r11
; SSE2-NEXT: movq %xmm2, %r8
; SSE2-NEXT: movq %xmm1, %r9
; SSE2-NEXT: movq %xmm3, %r10
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rdi
; SSE2-NEXT: xorq %rax, %rdi
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorq %rcx, %rax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rcx
; SSE2-NEXT: xorq %rdx, %rcx
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rdx
; SSE2-NEXT: xorq %rsi, %rdx
; SSE2-NEXT: orq %rcx, %rdx
Expand Down Expand Up @@ -426,28 +426,28 @@ define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) {
define i32 @eq_i512(<8 x i64> %x, <8 x i64> %y) {
; SSE2-LABEL: eq_i512:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
; SSE2-NEXT: movq %xmm8, %rax
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,2,3]
; SSE2-NEXT: movq %xmm8, %rcx
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm1[2,3,2,3]
; SSE2-NEXT: movq %xmm8, %rdx
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3]
; SSE2-NEXT: movq %xmm8, %rsi
; SSE2-NEXT: movq %xmm0, %r11
; SSE2-NEXT: movq %xmm2, %r8
; SSE2-NEXT: movq %xmm1, %r9
; SSE2-NEXT: movq %xmm3, %r10
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rdi
; SSE2-NEXT: xorq %rax, %rdi
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorq %rcx, %rax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rcx
; SSE2-NEXT: xorq %rdx, %rcx
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rdx
; SSE2-NEXT: xorq %rsi, %rdx
; SSE2-NEXT: orq %rcx, %rdx
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/shrink_vmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2094,9 +2094,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm3
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,0,1]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
; X86-SSE-NEXT: movd %xmm7, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,0,1]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
; X86-SSE-NEXT: movd %xmm7, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
Expand Down Expand Up @@ -2137,9 +2137,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm4
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; X86-SSE-NEXT: movd %xmm2, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X86-SSE-NEXT: movd %xmm1, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
Expand Down Expand Up @@ -2336,9 +2336,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm3
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,0,1]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
; X64-SSE-NEXT: movd %xmm7, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,0,1]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
; X64-SSE-NEXT: movd %xmm7, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
Expand Down Expand Up @@ -2379,9 +2379,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm4
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; X64-SSE-NEXT: movd %xmm2, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X64-SSE-NEXT: movd %xmm1, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
Expand Down
80 changes: 40 additions & 40 deletions llvm/test/CodeGen/X86/slow-pmulld.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/smul_fix_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X64-NEXT: cmovll %ecx, %edx
; X64-NEXT: movd %edx, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movd %xmm3, %edx
; X64-NEXT: movslq %edx, %rdx
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; X64-NEXT: movd %xmm3, %esi
; X64-NEXT: movslq %esi, %rsi
; X64-NEXT: imulq %rdx, %rsi
Expand Down Expand Up @@ -476,9 +476,9 @@ define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF
; X64-NEXT: imull %edx, %ecx
; X64-NEXT: cmovol %edi, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; X64-NEXT: movd %xmm2, %edx
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X64-NEXT: movd %xmm2, %esi
; X64-NEXT: movl %esi, %edi
; X64-NEXT: imull %edx, %edi
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/split-extend-vector-inreg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
define <4 x i64> @autogen_SD88863() {
; CHECK-LABEL: autogen_SD88863:
; CHECK: # %bb.0: # %BB
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/split-vector-rem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: movd %edx, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
; CHECK-NEXT: movd %xmm5, %eax
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
; CHECK-NEXT: movd %xmm5, %ecx
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
Expand All @@ -41,9 +41,9 @@ define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: movd %edx, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
; CHECK-NEXT: movd %xmm4, %eax
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
; CHECK-NEXT: movd %xmm4, %ecx
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
Expand Down Expand Up @@ -79,9 +79,9 @@ define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) {
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
; CHECK-NEXT: movd %edx, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
; CHECK-NEXT: movd %xmm5, %eax
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
; CHECK-NEXT: movd %xmm5, %ecx
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
Expand All @@ -108,9 +108,9 @@ define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) {
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
; CHECK-NEXT: movd %edx, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
; CHECK-NEXT: movd %xmm4, %eax
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
; CHECK-NEXT: movd %xmm4, %ecx
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2791,8 +2791,8 @@ define void @test_mm_storeh_pi(x86_mmx *%a0, <4 x float> %a1) nounwind {
;
; X64-SSE2-LABEL: test_mm_storeh_pi:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: pshufd $78, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x4e]
; X64-SSE2-NEXT: # xmm0 = xmm0[2,3,0,1]
; X64-SSE2-NEXT: punpckhqdq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x6d,0xc0]
; X64-SSE2-NEXT: # xmm0 = xmm0[1,1]
; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
; X64-SSE2-NEXT: retq # encoding: [0xc3]
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/sse41.ll
Original file line number Diff line number Diff line change
Expand Up @@ -692,24 +692,24 @@ entry:
define <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: insertps_from_shufflevector_i32_2:
; SSE: ## %bb.0: ## %entry
; SSE-NEXT: pshufd $78, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0x4e]
; SSE-NEXT: ## xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: pshufd $238, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0xee]
; SSE-NEXT: ## xmm1 = xmm1[2,3,2,3]
; SSE-NEXT: pblendw $12, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc1,0x0c]
; SSE-NEXT: ## xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
;
; AVX1-LABEL: insertps_from_shufflevector_i32_2:
; AVX1: ## %bb.0: ## %entry
; AVX1-NEXT: vpermilps $78, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
; AVX1-NEXT: ## xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
;
; AVX512-LABEL: insertps_from_shufflevector_i32_2:
; AVX512: ## %bb.0: ## %entry
; AVX512-NEXT: vpermilps $78, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
; AVX512-NEXT: ## xmm1 = xmm1[2,3,0,1]
; AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
; AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
Expand Down Expand Up @@ -1875,8 +1875,8 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
; X86-SSE-LABEL: insertps_pr20411:
; X86-SSE: ## %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: pshufd $78, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0x4e]
; X86-SSE-NEXT: ## xmm1 = xmm1[2,3,0,1]
; X86-SSE-NEXT: pshufd $238, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0xee]
; X86-SSE-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X86-SSE-NEXT: pblendw $243, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc8,0xf3]
; X86-SSE-NEXT: ## xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X86-SSE-NEXT: movdqu %xmm1, (%eax) ## encoding: [0xf3,0x0f,0x7f,0x08]
Expand All @@ -1885,8 +1885,8 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
; X86-AVX1-LABEL: insertps_pr20411:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vpermilps $78, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
; X86-AVX1-NEXT: ## xmm1 = xmm1[2,3,0,1]
; X86-AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; X86-AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X86-AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; X86-AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X86-AVX1-NEXT: vmovups %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x11,0x00]
Expand All @@ -1895,35 +1895,35 @@ define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32*
; X86-AVX512-LABEL: insertps_pr20411:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vpermilps $78, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
; X86-AVX512-NEXT: ## xmm1 = xmm1[2,3,0,1]
; X86-AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; X86-AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X86-AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; X86-AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X86-AVX512-NEXT: vmovups %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: insertps_pr20411:
; X64-SSE: ## %bb.0:
; X64-SSE-NEXT: pshufd $78, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0x4e]
; X64-SSE-NEXT: ## xmm1 = xmm1[2,3,0,1]
; X64-SSE-NEXT: pshufd $238, %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x70,0xc9,0xee]
; X64-SSE-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X64-SSE-NEXT: pblendw $243, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc8,0xf3]
; X64-SSE-NEXT: ## xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X64-SSE-NEXT: movdqu %xmm1, (%rdi) ## encoding: [0xf3,0x0f,0x7f,0x0f]
; X64-SSE-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX1-LABEL: insertps_pr20411:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpermilps $78, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
; X64-AVX1-NEXT: ## xmm1 = xmm1[2,3,0,1]
; X64-AVX1-NEXT: vpermilps $238, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; X64-AVX1-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X64-AVX1-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; X64-AVX1-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x11,0x07]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: insertps_pr20411:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpermilps $78, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0x4e]
; X64-AVX512-NEXT: ## xmm1 = xmm1[2,3,0,1]
; X64-AVX512-NEXT: vpermilps $238, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc9,0xee]
; X64-AVX512-NEXT: ## xmm1 = xmm1[2,3,2,3]
; X64-AVX512-NEXT: vblendps $2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x02]
; X64-AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/trunc-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ define <2 x i32> @test4(<8 x i32> %v) {
define <2 x i32> @test5(<8 x i32> %v) {
; SSE2-LABEL: test5:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,2,2]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
Expand Down Expand Up @@ -175,7 +175,7 @@ define <2 x i32> @test9(<8 x i32> %v) {
define <2 x i32> @test10(<8 x i32> %v) {
; SSE2-LABEL: test10:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,2,2]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/udiv_fix.ll
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; X64-NEXT: movq %xmm4, %rcx
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,2,3]
; X64-NEXT: movq %xmm4, %rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
Expand All @@ -264,9 +264,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X64-NEXT: movq %xmm1, %rcx
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NEXT: movq %xmm0, %rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/udiv_fix_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm7
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; X64-NEXT: movq %xmm2, %rcx
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,2,3]
; X64-NEXT: movq %xmm2, %rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
Expand Down Expand Up @@ -369,9 +369,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X64-NEXT: movq %xmm1, %rcx
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NEXT: movq %xmm0, %rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/uint_to_fp-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ define <4 x double> @mask_ucvt_4i32_4f64(<4 x i32> %a) {
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm2
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm1
; X32-SSE-NEXT: movaps %xmm2, %xmm0
; X32-SSE-NEXT: retl
Expand All @@ -55,7 +55,7 @@ define <4 x double> @mask_ucvt_4i32_4f64(<4 x i32> %a) {
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm2
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm1
; X64-SSE-NEXT: movaps %xmm2, %xmm0
; X64-SSE-NEXT: retq
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/umul_fix_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmoval %eax, %ecx
; X64-NEXT: movd %ecx, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movd %xmm3, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; X64-NEXT: movd %xmm3, %edx
; X64-NEXT: imulq %rcx, %rdx
; X64-NEXT: movq %rdx, %rcx
Expand Down Expand Up @@ -361,9 +361,9 @@ define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movl $-1, %ecx
; X64-NEXT: cmovol %ecx, %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; X64-NEXT: movd %xmm3, %eax
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movd %xmm3, %edx
; X64-NEXT: mull %edx
; X64-NEXT: cmovol %ecx, %eax
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2411,7 +2411,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: psrld $2, %xmm2
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm3[2,1]
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm3[2,3]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,16,1]
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
Expand Down Expand Up @@ -2516,7 +2516,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: psrld $2, %xmm2
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm3[2,1]
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm3[2,3]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [14,4294967295,16,1]
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/var-permute-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ define <2 x i64> @var_shuffle_v2i64(<2 x i64> %v, <2 x i64> %indices) nounwind {
; SSE3: # %bb.0:
; SSE3-NEXT: movq %xmm1, %rax
; SSE3-NEXT: andl $1, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE3-NEXT: movq %xmm1, %rcx
; SSE3-NEXT: andl $1, %ecx
; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
Expand All @@ -29,7 +29,7 @@ define <2 x i64> @var_shuffle_v2i64(<2 x i64> %v, <2 x i64> %indices) nounwind {
; SSSE3: # %bb.0:
; SSSE3-NEXT: movq %xmm1, %rax
; SSSE3-NEXT: andl $1, %eax
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSSE3-NEXT: movq %xmm1, %rcx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
Expand Down Expand Up @@ -69,7 +69,7 @@ define <4 x i32> @var_shuffle_v4i32(<4 x i32> %v, <4 x i32> %indices) nounwind {
; SSE3-NEXT: movd %xmm1, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
; SSE3-NEXT: movd %xmm2, %ecx
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; SSE3-NEXT: movd %xmm2, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE3-NEXT: movd %xmm1, %esi
Expand Down Expand Up @@ -379,7 +379,7 @@ define <2 x double> @var_shuffle_v2f64(<2 x double> %v, <2 x i64> %indices) noun
; SSE3: # %bb.0:
; SSE3-NEXT: movq %xmm1, %rax
; SSE3-NEXT: andl $1, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE3-NEXT: movq %xmm1, %rcx
; SSE3-NEXT: andl $1, %ecx
; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
Expand All @@ -391,7 +391,7 @@ define <2 x double> @var_shuffle_v2f64(<2 x double> %v, <2 x i64> %indices) noun
; SSSE3: # %bb.0:
; SSSE3-NEXT: movq %xmm1, %rax
; SSSE3-NEXT: andl $1, %eax
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSSE3-NEXT: movq %xmm1, %rcx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
Expand Down Expand Up @@ -430,7 +430,7 @@ define <4 x float> @var_shuffle_v4f32(<4 x float> %v, <4 x i32> %indices) nounwi
; SSE3-NEXT: movd %xmm1, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
; SSE3-NEXT: movd %xmm2, %ecx
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; SSE3-NEXT: movd %xmm2, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE3-NEXT: movd %xmm1, %esi
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fstps (%esp)
Expand All @@ -154,7 +154,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE-64: # %bb.0:
; SSE-64-NEXT: movq %xmm0, %rax
; SSE-64-NEXT: cvtsi2ss %rax, %xmm1
; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-64-NEXT: movq %xmm0, %rax
; SSE-64-NEXT: xorps %xmm0, %xmm0
; SSE-64-NEXT: cvtsi2ss %rax, %xmm0
Expand All @@ -172,7 +172,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE41-32-NEXT: andl $-8, %esp
; SSE41-32-NEXT: subl $24, %esp
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fstps (%esp)
Expand All @@ -191,7 +191,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE41-64: # %bb.0:
; SSE41-64-NEXT: movq %xmm0, %rax
; SSE41-64-NEXT: cvtsi2ss %rax, %xmm1
; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE41-64-NEXT: movq %xmm0, %rax
; SSE41-64-NEXT: xorps %xmm0, %xmm0
; SSE41-64-NEXT: cvtsi2ss %rax, %xmm0
Expand All @@ -209,7 +209,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $24, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstps (%esp)
Expand All @@ -236,7 +236,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm1
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm0
; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVX512DQ-32-NEXT: vzeroupper
Expand Down Expand Up @@ -271,7 +271,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE-32-NEXT: .cfi_def_cfa_register %ebp
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; SSE-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
Expand Down Expand Up @@ -313,7 +313,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE-64-NEXT: # %bb.1:
; SSE-64-NEXT: addss %xmm0, %xmm0
; SSE-64-NEXT: .LBB3_2:
; SSE-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE-64-NEXT: movq %xmm1, %rax
; SSE-64-NEXT: movq %rax, %rcx
; SSE-64-NEXT: shrq %rcx
Expand All @@ -340,7 +340,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE41-32-NEXT: .cfi_def_cfa_register %ebp
; SSE41-32-NEXT: andl $-8, %esp
; SSE41-32-NEXT: subl $24, %esp
; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; SSE41-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp)
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
Expand Down Expand Up @@ -382,7 +382,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE41-64-NEXT: # %bb.1:
; SSE41-64-NEXT: addss %xmm0, %xmm0
; SSE41-64-NEXT: .LBB3_2:
; SSE41-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE41-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE41-64-NEXT: movq %xmm1, %rax
; SSE41-64-NEXT: movq %rax, %rcx
; SSE41-64-NEXT: shrq %rcx
Expand Down Expand Up @@ -410,7 +410,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $24, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vextractps $1, %xmm0, %eax
; AVX-32-NEXT: shrl $31, %eax
Expand Down Expand Up @@ -471,7 +471,7 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm1
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX512DQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm0
; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVX512DQ-32-NEXT: vzeroupper
Expand Down Expand Up @@ -1146,7 +1146,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $32, %esp
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp)
Expand All @@ -1164,7 +1164,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; SSE-64: # %bb.0:
; SSE-64-NEXT: movq %xmm0, %rax
; SSE-64-NEXT: cvtsi2sd %rax, %xmm1
; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-64-NEXT: movq %xmm0, %rax
; SSE-64-NEXT: xorps %xmm0, %xmm0
; SSE-64-NEXT: cvtsi2sd %rax, %xmm0
Expand All @@ -1182,7 +1182,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; SSE41-32-NEXT: andl $-8, %esp
; SSE41-32-NEXT: subl $32, %esp
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp)
Expand All @@ -1200,7 +1200,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; SSE41-64: # %bb.0:
; SSE41-64-NEXT: movq %xmm0, %rax
; SSE41-64-NEXT: cvtsi2sd %rax, %xmm1
; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE41-64-NEXT: movq %xmm0, %rax
; SSE41-64-NEXT: xorps %xmm0, %xmm0
; SSE41-64-NEXT: cvtsi2sd %rax, %xmm0
Expand All @@ -1218,7 +1218,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $32, %esp
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ define <8 x float> @sitofp_v8i16_v8f32(<8 x i16> %x) #0 {
; AVX1-LABEL: sitofp_v8i16_v8f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vec_cast2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ define <8 x float> @cvt_v8i16_v8f32(<8 x i16> %src) {
; CHECK-LABEL: cvt_v8i16_v8f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
Expand Down
120 changes: 60 additions & 60 deletions llvm/test/CodeGen/X86/vec_int_to_fp.ll

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/vec_saddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ define <3 x i32> @saddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movq %xmm1, (%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE2-NEXT: movd %xmm1, 8(%rdi)
; SSE2-NEXT: retq
;
Expand All @@ -119,7 +119,7 @@ define <3 x i32> @saddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSSE3-NEXT: pxor %xmm2, %xmm0
; SSSE3-NEXT: movq %xmm1, (%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSSE3-NEXT: movd %xmm1, 8(%rdi)
; SSSE3-NEXT: retq
;
Expand Down Expand Up @@ -512,13 +512,13 @@ define <16 x i32> @saddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm4
; AVX1-NEXT: vpacksswb %xmm1, %xmm4, %xmm0
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm5
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm5, %ymm0
; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
Expand Down Expand Up @@ -644,7 +644,7 @@ define <16 x i32> @saddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm2
; SSE41-NEXT: psrad $31, %xmm2
Expand All @@ -667,9 +667,9 @@ define <16 x i32> @saddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: vmovdqa %xmm3, (%rdi)
Expand All @@ -683,7 +683,7 @@ define <16 x i32> @saddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX2-NEXT: vmovdqa %xmm3, (%rdi)
; AVX2-NEXT: retq
Expand Down Expand Up @@ -769,7 +769,7 @@ define <8 x i32> @saddo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
Expand Down Expand Up @@ -889,7 +889,7 @@ define <4 x i32> @saddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
; SSE2-NEXT: movd %xmm1, %ecx
; SSE2-NEXT: movw %cx, 9(%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSE2-NEXT: movd %xmm1, %edx
; SSE2-NEXT: movw %dx, 6(%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
Expand Down Expand Up @@ -924,7 +924,7 @@ define <4 x i32> @saddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
; SSSE3-NEXT: movd %xmm1, %ecx
; SSSE3-NEXT: movw %cx, 9(%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSSE3-NEXT: movd %xmm1, %edx
; SSSE3-NEXT: movw %dx, 6(%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
Expand Down
136 changes: 68 additions & 68 deletions llvm/test/CodeGen/X86/vec_smulo.ll

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/vec_ssubo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ define <3 x i32> @ssubo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: movq %xmm3, (%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
; SSE2-NEXT: movd %xmm1, 8(%rdi)
; SSE2-NEXT: retq
;
Expand All @@ -122,7 +122,7 @@ define <3 x i32> @ssubo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm0
; SSSE3-NEXT: movq %xmm3, (%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
; SSSE3-NEXT: movd %xmm1, 8(%rdi)
; SSSE3-NEXT: retq
;
Expand Down Expand Up @@ -517,13 +517,13 @@ define <16 x i32> @ssubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm4
; AVX1-NEXT: vpacksswb %xmm1, %xmm4, %xmm0
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm5
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm5, %ymm0
; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
Expand Down Expand Up @@ -649,7 +649,7 @@ define <16 x i32> @ssubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm2
; SSE41-NEXT: psrad $31, %xmm2
Expand All @@ -672,9 +672,9 @@ define <16 x i32> @ssubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: vmovdqa %xmm3, (%rdi)
Expand All @@ -688,7 +688,7 @@ define <16 x i32> @ssubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX2-NEXT: vmovdqa %xmm3, (%rdi)
; AVX2-NEXT: retq
Expand Down Expand Up @@ -774,7 +774,7 @@ define <8 x i32> @ssubo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
Expand Down Expand Up @@ -899,7 +899,7 @@ define <4 x i32> @ssubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
; SSE2-NEXT: movd %xmm1, %ecx
; SSE2-NEXT: movw %cx, 9(%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSE2-NEXT: movd %xmm1, %edx
; SSE2-NEXT: movw %dx, 6(%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
Expand Down Expand Up @@ -934,7 +934,7 @@ define <4 x i32> @ssubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
; SSSE3-NEXT: movd %xmm1, %ecx
; SSSE3-NEXT: movw %cx, 9(%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSSE3-NEXT: movd %xmm1, %edx
; SSSE3-NEXT: movw %dx, 6(%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/vec_uaddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ define <3 x i32> @uaddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSE2-NEXT: pxor %xmm1, %xmm2
; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
; SSE2-NEXT: movq %xmm1, (%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE2-NEXT: movd %xmm1, 8(%rdi)
; SSE2-NEXT: retq
;
Expand All @@ -132,7 +132,7 @@ define <3 x i32> @uaddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSSE3-NEXT: pxor %xmm1, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
; SSSE3-NEXT: movq %xmm1, (%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSSE3-NEXT: movd %xmm1, 8(%rdi)
; SSSE3-NEXT: retq
;
Expand Down Expand Up @@ -601,13 +601,13 @@ define <16 x i32> @uaddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
; AVX1-NEXT: vpmaxud %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm7, %xmm0, %xmm6
; AVX1-NEXT: vpacksswb %xmm1, %xmm6, %xmm0
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm7
; AVX1-NEXT: vpackssdw %xmm7, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm6
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm7, %ymm0
; AVX1-NEXT: vpacksswb %xmm6, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm6, %ymm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm6
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
Expand Down Expand Up @@ -727,7 +727,7 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm4
; SSE41-NEXT: psrad $31, %xmm4
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm2
; SSE41-NEXT: psrad $31, %xmm2
Expand All @@ -750,9 +750,9 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
Expand All @@ -766,7 +766,7 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
; AVX2-NEXT: retq
Expand Down Expand Up @@ -850,7 +850,7 @@ define <8 x i32> @uaddo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
Expand Down Expand Up @@ -956,7 +956,7 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
; SSE2-NEXT: movd %xmm1, %ecx
; SSE2-NEXT: movw %cx, 9(%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSE2-NEXT: movd %xmm1, %edx
; SSE2-NEXT: movw %dx, 6(%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
Expand Down Expand Up @@ -988,7 +988,7 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
; SSSE3-NEXT: movd %xmm1, %ecx
; SSSE3-NEXT: movw %cx, 9(%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSSE3-NEXT: movd %xmm1, %edx
; SSSE3-NEXT: movw %dx, 6(%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
Expand Down
78 changes: 39 additions & 39 deletions llvm/test/CodeGen/X86/vec_umulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1008,16 +1008,16 @@ define <16 x i32> @umulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm8, %xmm5
; AVX1-NEXT: vpxor %xmm5, %xmm9, %xmm5
; AVX1-NEXT: vpackssdw %xmm13, %xmm5, %xmm5
; AVX1-NEXT: vpacksswb %xmm11, %xmm5, %xmm7
; AVX1-NEXT: vpacksswb %xmm11, %xmm5, %xmm5
; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpmulld %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vpmulld %xmm10, %xmm12, %xmm6
; AVX1-NEXT: vpmovsxbd %xmm7, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm7[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm5, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm5[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vpacksswb %xmm5, %xmm11, %xmm1
; AVX1-NEXT: vpacksswb %xmm11, %xmm11, %xmm1
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
Expand Down Expand Up @@ -1217,7 +1217,7 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm2
; SSE41-NEXT: psrad $31, %xmm2
Expand Down Expand Up @@ -1254,9 +1254,9 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
Expand All @@ -1278,7 +1278,7 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
; AVX2-NEXT: retq
Expand Down Expand Up @@ -1560,7 +1560,7 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm6[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm6[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
Expand All @@ -1572,7 +1572,7 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm3
; SSE41-NEXT: psrad $31, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm4
; SSE41-NEXT: psrad $31, %xmm4
Expand Down Expand Up @@ -1647,14 +1647,14 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm6[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm6[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vmovdqa %xmm5, 16(%rdi)
Expand Down Expand Up @@ -1689,9 +1689,9 @@ define <32 x i32> @umulo_v32i8(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>* %p2) nou
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
; AVX2-NEXT: vpmovsxbd %xmm3, %ymm2
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm3, %ymm3
; AVX2-NEXT: vmovdqa %ymm4, (%rdi)
; AVX2-NEXT: retq
Expand Down Expand Up @@ -2230,7 +2230,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: movdqa %xmm0, (%rdi)
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm14[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm14[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
Expand All @@ -2245,7 +2245,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: movdqa %xmm0, 208(%rdi)
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm12[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm12[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
Expand All @@ -2260,7 +2260,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: movdqa %xmm0, 144(%rdi)
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
Expand All @@ -2275,7 +2275,7 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: movdqa %xmm0, 80(%rdi)
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm8[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm8[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
Expand Down Expand Up @@ -2390,37 +2390,37 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
; AVX1-NEXT: vmovdqa %xmm4, 64(%rdi)
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm4
; AVX1-NEXT: vmovdqa %xmm4, (%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
; AVX1-NEXT: vmovdqa %xmm4, 224(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
; AVX1-NEXT: vmovdqa %xmm4, 240(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vmovdqa %xmm3, 208(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vmovdqa %xmm3, 160(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vmovdqa %xmm3, 176(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
; AVX1-NEXT: vmovdqa %xmm2, 144(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
; AVX1-NEXT: vmovdqa %xmm2, 96(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2
; AVX1-NEXT: vmovdqa %xmm2, 112(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vmovdqa %xmm1, 80(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vmovdqa %xmm1, 32(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vmovdqa %xmm1, 48(%rdi)
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
Expand Down Expand Up @@ -2475,15 +2475,15 @@ define <64 x i32> @umulo_v64i8(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %p2) nou
; AVX2-NEXT: vpackuswb %ymm7, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm6, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm6, %ymm6
; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm7, %ymm7
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX2-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm5, %ymm5
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
; AVX2-NEXT: vpmovsxbd %xmm3, %ymm3
Expand Down Expand Up @@ -2608,7 +2608,7 @@ define <8 x i32> @umulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
Expand Down Expand Up @@ -2646,9 +2646,9 @@ define <8 x i32> @umulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) nounwind {
; SSE2-LABEL: umulo_v2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; SSE2-NEXT: movq %xmm2, %r8
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; SSE2-NEXT: movq %xmm2, %r10
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: movq %xmm1, %rdx
Expand All @@ -2672,9 +2672,9 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) noun
;
; SSSE3-LABEL: umulo_v2i64:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; SSSE3-NEXT: movq %xmm2, %r8
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; SSSE3-NEXT: movq %xmm2, %r10
; SSSE3-NEXT: movq %xmm0, %rax
; SSSE3-NEXT: movq %xmm1, %rdx
Expand Down Expand Up @@ -2829,7 +2829,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE2-NEXT: por %xmm3, %xmm0
; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: movw %ax, (%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; SSE2-NEXT: movd %xmm2, %ecx
; SSE2-NEXT: movw %cx, 6(%rdi)
; SSE2-NEXT: movd %xmm1, %edx
Expand Down Expand Up @@ -2873,7 +2873,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSSE3-NEXT: por %xmm3, %xmm0
; SSSE3-NEXT: movd %xmm2, %eax
; SSSE3-NEXT: movw %ax, (%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; SSSE3-NEXT: movd %xmm2, %ecx
; SSSE3-NEXT: movw %cx, 6(%rdi)
; SSSE3-NEXT: movd %xmm1, %edx
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/vec_usubo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ define <3 x i32> @usubo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSE2-NEXT: pxor %xmm0, %xmm2
; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
; SSE2-NEXT: movq %xmm0, (%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE2-NEXT: movd %xmm0, 8(%rdi)
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
Expand All @@ -140,7 +140,7 @@ define <3 x i32> @usubo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSSE3-NEXT: pxor %xmm0, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
; SSSE3-NEXT: movq %xmm0, (%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSSE3-NEXT: movd %xmm0, 8(%rdi)
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: retq
Expand Down Expand Up @@ -644,13 +644,13 @@ define <16 x i32> @usubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
; AVX1-NEXT: vpminud %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm7, %xmm0, %xmm6
; AVX1-NEXT: vpacksswb %xmm1, %xmm6, %xmm0
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm7
; AVX1-NEXT: vpackssdw %xmm7, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm6
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm7, %ymm0
; AVX1-NEXT: vpacksswb %xmm6, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm6, %ymm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm6
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
Expand Down Expand Up @@ -771,7 +771,7 @@ define <16 x i32> @usubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
; SSE41-NEXT: pslld $31, %xmm2
; SSE41-NEXT: psrad $31, %xmm2
Expand All @@ -793,9 +793,9 @@ define <16 x i32> @usubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,2,3]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
Expand All @@ -809,7 +809,7 @@ define <16 x i32> @usubo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
; AVX2-NEXT: retq
Expand Down Expand Up @@ -895,7 +895,7 @@ define <8 x i32> @usubo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
Expand Down Expand Up @@ -1003,7 +1003,7 @@ define <4 x i32> @usubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
; SSE2-NEXT: movd %xmm1, %ecx
; SSE2-NEXT: movw %cx, 9(%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSE2-NEXT: movd %xmm1, %edx
; SSE2-NEXT: movw %dx, 6(%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
Expand Down Expand Up @@ -1035,7 +1035,7 @@ define <4 x i32> @usubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,1,2,3]
; SSSE3-NEXT: movd %xmm1, %ecx
; SSSE3-NEXT: movw %cx, 9(%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSSE3-NEXT: movd %xmm1, %edx
; SSSE3-NEXT: movw %dx, 6(%rdi)
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6296,7 +6296,7 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: cvtsi2sd %rax, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
Expand Down Expand Up @@ -6342,7 +6342,7 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
Expand Down Expand Up @@ -6375,7 +6375,7 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
; CHECK-NEXT: movd %xmm1, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2sd %eax, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %eax, %xmm0
Expand Down Expand Up @@ -6414,7 +6414,7 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: cvtsi2ss %eax, %xmm2
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %eax, %xmm0
Expand Down Expand Up @@ -6535,7 +6535,7 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
Expand Down Expand Up @@ -6575,14 +6575,14 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: cvtsi2sd %rax, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: cvtsi2sd %rax, %xmm3
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
Expand Down Expand Up @@ -6642,15 +6642,15 @@ define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: cvtsi2ss %rax, %xmm2
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2ss %rax, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
Expand Down Expand Up @@ -6970,7 +6970,7 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm0, %xmm0
; CHECK-NEXT: .LBB174_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
Expand Down Expand Up @@ -7031,7 +7031,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
; CHECK-NEXT: movd %xmm1, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsi2sd %rax, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
Expand Down Expand Up @@ -7082,7 +7082,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: cvtsi2ss %rax, %xmm2
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %rax, %xmm0
Expand Down Expand Up @@ -7157,7 +7157,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: vsubpd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
; AVX1-NEXT: vaddpd %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[2,3,2,3]
; AVX1-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
; AVX1-NEXT: vsubpd %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpermilpd {{.*#+}} xmm5 = xmm4[1,0]
Expand Down Expand Up @@ -7458,7 +7458,7 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm2, %xmm2
; CHECK-NEXT: .LBB182_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
Expand Down Expand Up @@ -7487,7 +7487,7 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-NEXT: addss %xmm1, %xmm1
; CHECK-NEXT: .LBB182_6: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/vector-fshl-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: psllq %xmm2, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: psllq %xmm4, %xmm5
; SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [64,64]
; SSE2-NEXT: psubq %xmm2, %xmm3
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrlq %xmm3, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; SSE2-NEXT: psrlq %xmm3, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
; SSE2-NEXT: orpd %xmm5, %xmm1
Expand All @@ -56,15 +56,15 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
; SSE41-NEXT: psllq %xmm2, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: psllq %xmm5, %xmm4
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm0[0,1,2,3],xmm4[4,5,6,7]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [64,64]
; SSE41-NEXT: psubq %xmm2, %xmm0
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: psrlq %xmm0, %xmm5
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE41-NEXT: psrlq %xmm0, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: por %xmm1, %xmm4
Expand All @@ -78,13 +78,13 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64]
; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7]
; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
Expand Down Expand Up @@ -212,15 +212,15 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
; X32-SSE-NEXT: psllq %xmm2, %xmm3
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
; X32-SSE-NEXT: psllq %xmm4, %xmm5
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [64,0,64,0]
; X32-SSE-NEXT: psubq %xmm2, %xmm3
; X32-SSE-NEXT: movdqa %xmm1, %xmm4
; X32-SSE-NEXT: psrlq %xmm3, %xmm4
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; X32-SSE-NEXT: psrlq %xmm3, %xmm1
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
; X32-SSE-NEXT: orpd %xmm5, %xmm1
Expand Down Expand Up @@ -249,7 +249,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: psrld %xmm6, %xmm3
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm1, %xmm6
; SSE2-NEXT: psrld %xmm5, %xmm6
Expand Down Expand Up @@ -285,7 +285,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm0[2,3,3,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: psrld %xmm4, %xmm5
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
; SSE41-NEXT: pshuflw {{.*#+}} xmm6 = xmm4[2,3,3,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm7
; SSE41-NEXT: psrld %xmm6, %xmm7
Expand Down Expand Up @@ -465,7 +465,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; X32-SSE-NEXT: movdqa %xmm1, %xmm3
; X32-SSE-NEXT: psrld %xmm6, %xmm3
; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
; X32-SSE-NEXT: movdqa %xmm1, %xmm6
; X32-SSE-NEXT: psrld %xmm5, %xmm6
Expand Down Expand Up @@ -1366,7 +1366,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; X32-SSE-NEXT: psubq %xmm3, %xmm4
; X32-SSE-NEXT: movdqa %xmm1, %xmm3
; X32-SSE-NEXT: psrlq %xmm4, %xmm3
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; X32-SSE-NEXT: psrlq %xmm4, %xmm1
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vector-fshl-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,24 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vpsllq %xmm4, %xmm3, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm6, %xmm0, %xmm6
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [64,64]
; AVX1-NEXT: vpsubq %xmm4, %xmm8, %xmm6
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
; AVX1-NEXT: vpsrlq %xmm6, %xmm7, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm6, %xmm7, %xmm6
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
; AVX1-NEXT: vpsubq %xmm2, %xmm8, %xmm6
; AVX1-NEXT: vpsrlq %xmm6, %xmm1, %xmm7
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm6, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm7[0,1,2,3],xmm1[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: psllq %xmm1, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: psllq %xmm1, %xmm5
; SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
; SSE2-NEXT: pand %xmm2, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlq %xmm3, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE2-NEXT: psrlq %xmm2, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: orpd %xmm5, %xmm0
Expand All @@ -52,14 +52,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: psllq %xmm1, %xmm4
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE41-NEXT: movdqa %xmm0, %xmm5
; SSE41-NEXT: psllq %xmm1, %xmm5
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0,1,2,3],xmm5[4,5,6,7]
; SSE41-NEXT: pand %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlq %xmm3, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE41-NEXT: psrlq %xmm2, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: por %xmm5, %xmm0
Expand All @@ -70,14 +70,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
Expand Down Expand Up @@ -136,14 +136,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; X32-SSE-NEXT: pand %xmm2, %xmm1
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: psllq %xmm1, %xmm4
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
; X32-SSE-NEXT: psllq %xmm1, %xmm5
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
; X32-SSE-NEXT: pand %xmm2, %xmm3
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
; X32-SSE-NEXT: psrlq %xmm3, %xmm1
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; X32-SSE-NEXT: psrlq %xmm2, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-SSE-NEXT: orpd %xmm5, %xmm0
Expand Down Expand Up @@ -745,14 +745,14 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; X32-SSE-NEXT: pand %xmm2, %xmm1
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: psllq %xmm1, %xmm4
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
; X32-SSE-NEXT: psllq %xmm1, %xmm5
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
; X32-SSE-NEXT: pand %xmm2, %xmm3
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
; X32-SSE-NEXT: psrlq %xmm3, %xmm1
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; X32-SSE-NEXT: psrlq %xmm2, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-SSE-NEXT: orpd %xmm5, %xmm0
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
Expand All @@ -38,13 +38,13 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63]
; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm7
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0,1,2,3],xmm2[4,5,6,7]
; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand Down Expand Up @@ -520,7 +520,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm6, %xmm4, %xmm7
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm2
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/vector-fshr-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: psrlq %xmm2, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: psrlq %xmm4, %xmm5
; SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [64,64]
; SSE2-NEXT: psubq %xmm2, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: psllq %xmm3, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; SSE2-NEXT: psllq %xmm3, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; SSE2-NEXT: orpd %xmm5, %xmm0
Expand All @@ -58,15 +58,15 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlq %xmm2, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: psrlq %xmm4, %xmm5
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm0[0,1,2,3],xmm5[4,5,6,7]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [64,64]
; SSE41-NEXT: psubq %xmm2, %xmm0
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: psllq %xmm0, %xmm4
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE41-NEXT: psllq %xmm0, %xmm3
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
; SSE41-NEXT: por %xmm5, %xmm3
Expand All @@ -80,13 +80,13 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [64,64]
; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm4
; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm5[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
Expand Down Expand Up @@ -215,15 +215,15 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
; X32-SSE-NEXT: movdqa %xmm1, %xmm3
; X32-SSE-NEXT: psrlq %xmm2, %xmm3
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; X32-SSE-NEXT: movdqa %xmm1, %xmm5
; X32-SSE-NEXT: psrlq %xmm4, %xmm5
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [64,0,64,0]
; X32-SSE-NEXT: psubq %xmm2, %xmm3
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: psllq %xmm3, %xmm4
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; X32-SSE-NEXT: psllq %xmm3, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; X32-SSE-NEXT: orpd %xmm5, %xmm0
Expand Down Expand Up @@ -251,7 +251,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: psrld %xmm5, %xmm3
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm1, %xmm6
; SSE2-NEXT: psrld %xmm5, %xmm6
Expand Down Expand Up @@ -287,7 +287,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[2,3,3,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm4
; SSE41-NEXT: psrld %xmm0, %xmm4
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
; SSE41-NEXT: pshuflw {{.*#+}} xmm5 = xmm0[2,3,3,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm6
; SSE41-NEXT: psrld %xmm5, %xmm6
Expand Down Expand Up @@ -469,7 +469,7 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; X32-SSE-NEXT: movdqa %xmm1, %xmm3
; X32-SSE-NEXT: psrld %xmm5, %xmm3
; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,2,3]
; X32-SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
; X32-SSE-NEXT: movdqa %xmm1, %xmm6
; X32-SSE-NEXT: psrld %xmm5, %xmm6
Expand Down Expand Up @@ -1380,7 +1380,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; X32-SSE-NEXT: psubq %xmm4, %xmm5
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: psllq %xmm5, %xmm4
; X32-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
; X32-SSE-NEXT: psllq %xmm5, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; X32-SSE-NEXT: movdqa %xmm1, %xmm4
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vector-fshr-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,24 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vpsrlq %xmm4, %xmm3, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm6, %xmm1, %xmm6
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [64,64]
; AVX1-NEXT: vpsubq %xmm4, %xmm8, %xmm6
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
; AVX1-NEXT: vpsllq %xmm6, %xmm7, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm6, %xmm7, %xmm6
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
; AVX1-NEXT: vpsubq %xmm2, %xmm8, %xmm6
; AVX1-NEXT: vpsllq %xmm6, %xmm0, %xmm7
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm6, %xmm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm7[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: psrlq %xmm1, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: psrlq %xmm1, %xmm5
; SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
; SSE2-NEXT: pand %xmm2, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psllq %xmm3, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE2-NEXT: psllq %xmm2, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: orpd %xmm5, %xmm0
Expand All @@ -52,14 +52,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: psrlq %xmm1, %xmm4
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE41-NEXT: movdqa %xmm0, %xmm5
; SSE41-NEXT: psrlq %xmm1, %xmm5
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm4[0,1,2,3],xmm5[4,5,6,7]
; SSE41-NEXT: pand %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psllq %xmm3, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; SSE41-NEXT: psllq %xmm2, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: por %xmm5, %xmm0
Expand All @@ -70,14 +70,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
Expand Down Expand Up @@ -138,14 +138,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
; X32-SSE-NEXT: pand %xmm2, %xmm1
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: psrlq %xmm1, %xmm4
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
; X32-SSE-NEXT: psrlq %xmm1, %xmm5
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
; X32-SSE-NEXT: pand %xmm2, %xmm3
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
; X32-SSE-NEXT: psllq %xmm3, %xmm1
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; X32-SSE-NEXT: psllq %xmm2, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-SSE-NEXT: orpd %xmm5, %xmm0
Expand Down Expand Up @@ -789,14 +789,14 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; X32-SSE-NEXT: pand %xmm2, %xmm1
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: psrlq %xmm1, %xmm4
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
; X32-SSE-NEXT: psrlq %xmm1, %xmm5
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
; X32-SSE-NEXT: pand %xmm2, %xmm3
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
; X32-SSE-NEXT: psllq %xmm3, %xmm1
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
; X32-SSE-NEXT: psllq %xmm2, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-SSE-NEXT: orpd %xmm5, %xmm0
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
Expand All @@ -38,13 +38,13 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63]
; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm7
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm7[0,1,2,3],xmm2[4,5,6,7]
; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Expand Down Expand Up @@ -566,7 +566,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpsllq %xmm2, %xmm4, %xmm5
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm6, %xmm4, %xmm7
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7]
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm2
Expand Down
Loading