50 changes: 22 additions & 28 deletions llvm/test/CodeGen/X86/sse-minmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ define double @ole(double %x, double %y) {
; STRICT-NEXT: cmplesd %xmm1, %xmm2
; STRICT-NEXT: andpd %xmm2, %xmm0
; STRICT-NEXT: andnpd %xmm1, %xmm2
; STRICT-NEXT: orpd %xmm0, %xmm2
; STRICT-NEXT: movapd %xmm2, %xmm0
; STRICT-NEXT: orpd %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ole:
Expand Down Expand Up @@ -261,11 +260,10 @@ define double @oge_x(double %x) {
define double @ole_x(double %x) {
; STRICT-LABEL: ole_x:
; STRICT: # %bb.0:
; STRICT-NEXT: xorpd %xmm2, %xmm2
; STRICT-NEXT: movapd %xmm0, %xmm1
; STRICT-NEXT: cmplesd %xmm2, %xmm1
; STRICT-NEXT: andpd %xmm0, %xmm1
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: xorpd %xmm1, %xmm1
; STRICT-NEXT: movapd %xmm0, %xmm2
; STRICT-NEXT: cmplesd %xmm1, %xmm2
; STRICT-NEXT: andpd %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ole_x:
Expand Down Expand Up @@ -338,8 +336,7 @@ define double @ugt(double %x, double %y) {
; STRICT-NEXT: cmpnlesd %xmm1, %xmm2
; STRICT-NEXT: andpd %xmm2, %xmm0
; STRICT-NEXT: andnpd %xmm1, %xmm2
; STRICT-NEXT: orpd %xmm0, %xmm2
; STRICT-NEXT: movapd %xmm2, %xmm0
; STRICT-NEXT: orpd %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ugt:
Expand Down Expand Up @@ -499,11 +496,10 @@ define double @ule_inverse(double %x, double %y) {
define double @ugt_x(double %x) {
; STRICT-LABEL: ugt_x:
; STRICT: # %bb.0:
; STRICT-NEXT: xorpd %xmm2, %xmm2
; STRICT-NEXT: movapd %xmm0, %xmm1
; STRICT-NEXT: cmpnlesd %xmm2, %xmm1
; STRICT-NEXT: andpd %xmm0, %xmm1
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: xorpd %xmm1, %xmm1
; STRICT-NEXT: movapd %xmm0, %xmm2
; STRICT-NEXT: cmpnlesd %xmm1, %xmm2
; STRICT-NEXT: andpd %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ugt_x:
Expand Down Expand Up @@ -762,13 +758,12 @@ define double @oge_y(double %x) {
define double @ole_y(double %x) {
; STRICT-LABEL: ole_y:
; STRICT: # %bb.0:
; STRICT-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; STRICT-NEXT: movapd %xmm0, %xmm1
; STRICT-NEXT: cmplesd %xmm2, %xmm1
; STRICT-NEXT: andpd %xmm1, %xmm0
; STRICT-NEXT: andnpd %xmm2, %xmm1
; STRICT-NEXT: orpd %xmm0, %xmm1
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; STRICT-NEXT: movapd %xmm0, %xmm2
; STRICT-NEXT: cmplesd %xmm1, %xmm2
; STRICT-NEXT: andpd %xmm2, %xmm0
; STRICT-NEXT: andnpd %xmm1, %xmm2
; STRICT-NEXT: orpd %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ole_y:
Expand Down Expand Up @@ -839,13 +834,12 @@ define double @ole_inverse_y(double %x) {
define double @ugt_y(double %x) {
; STRICT-LABEL: ugt_y:
; STRICT: # %bb.0:
; STRICT-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; STRICT-NEXT: movapd %xmm0, %xmm1
; STRICT-NEXT: cmpnlesd %xmm2, %xmm1
; STRICT-NEXT: andpd %xmm1, %xmm0
; STRICT-NEXT: andnpd %xmm2, %xmm1
; STRICT-NEXT: orpd %xmm0, %xmm1
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; STRICT-NEXT: movapd %xmm0, %xmm2
; STRICT-NEXT: cmpnlesd %xmm1, %xmm2
; STRICT-NEXT: andpd %xmm2, %xmm0
; STRICT-NEXT: andnpd %xmm1, %xmm2
; STRICT-NEXT: orpd %xmm2, %xmm0
; STRICT-NEXT: retq
;
; RELAX-LABEL: ugt_y:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/sshl_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -205,18 +205,18 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X64-LABEL: func5:
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rcx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testq %rdi, %rdi
; X64-NEXT: sets %dl
; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: addq %rdx, %rax
; X64-NEXT: movq %rdi, %rdx
; X64-NEXT: shlq %cl, %rdx
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: sets %al
; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: addq %rax, %rdx
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shlq %cl, %rax
; X64-NEXT: movq %rax, %rsi
; X64-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NEXT: sarq %cl, %rsi
; X64-NEXT: cmpq %rsi, %rdi
; X64-NEXT: cmoveq %rdx, %rax
; X64-NEXT: cmovneq %rdx, %rax
; X64-NEXT: retq
;
; X86-LABEL: func5:
Expand Down
21 changes: 10 additions & 11 deletions llvm/test/CodeGen/X86/ssub_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -207,19 +207,18 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; X64-LABEL: vec:
; X64: # %bb.0:
; X64-NEXT: pxor %xmm3, %xmm3
; X64-NEXT: movdqa %xmm0, %xmm2
; X64-NEXT: psubd %xmm1, %xmm2
; X64-NEXT: pcmpgtd %xmm3, %xmm1
; X64-NEXT: pcmpgtd %xmm2, %xmm0
; X64-NEXT: pxor %xmm2, %xmm2
; X64-NEXT: movdqa %xmm0, %xmm3
; X64-NEXT: psubd %xmm1, %xmm3
; X64-NEXT: pcmpgtd %xmm2, %xmm1
; X64-NEXT: pcmpgtd %xmm3, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: pandn %xmm2, %xmm1
; X64-NEXT: psrad $31, %xmm2
; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; X64-NEXT: pand %xmm0, %xmm2
; X64-NEXT: por %xmm1, %xmm2
; X64-NEXT: movdqa %xmm2, %xmm0
; X64-NEXT: pandn %xmm3, %xmm1
; X64-NEXT: psrad $31, %xmm3
; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; X64-NEXT: pand %xmm3, %xmm0
; X64-NEXT: por %xmm1, %xmm0
; X64-NEXT: retq
%tmp = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %tmp
Expand Down
288 changes: 138 additions & 150 deletions llvm/test/CodeGen/X86/ssub_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -612,36 +612,34 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; SSE2-LABEL: v2i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm2
; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: psubd %xmm1, %xmm3
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pandn %xmm2, %xmm1
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: pand %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: pandn %xmm3, %xmm1
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: v2i32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pxor %xmm3, %xmm3
; SSSE3-NEXT: movdqa %xmm0, %xmm2
; SSSE3-NEXT: psubd %xmm1, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: movdqa %xmm0, %xmm3
; SSSE3-NEXT: psubd %xmm1, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pandn %xmm2, %xmm1
; SSSE3-NEXT: psrad $31, %xmm2
; SSSE3-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSSE3-NEXT: pand %xmm0, %xmm2
; SSSE3-NEXT: por %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: pandn %xmm3, %xmm1
; SSSE3-NEXT: psrad $31, %xmm3
; SSSE3-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSSE3-NEXT: pand %xmm3, %xmm0
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: v2i32:
Expand Down Expand Up @@ -715,36 +713,34 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; SSE2-LABEL: v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm2
; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: psubd %xmm1, %xmm3
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pandn %xmm2, %xmm1
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: pand %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: pandn %xmm3, %xmm1
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: v4i32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pxor %xmm3, %xmm3
; SSSE3-NEXT: movdqa %xmm0, %xmm2
; SSSE3-NEXT: psubd %xmm1, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: movdqa %xmm0, %xmm3
; SSSE3-NEXT: psubd %xmm1, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pandn %xmm2, %xmm1
; SSSE3-NEXT: psrad $31, %xmm2
; SSSE3-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSSE3-NEXT: pand %xmm0, %xmm2
; SSSE3-NEXT: por %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: pandn %xmm3, %xmm1
; SSSE3-NEXT: psrad $31, %xmm3
; SSSE3-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSSE3-NEXT: pand %xmm3, %xmm0
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: v4i32:
Expand Down Expand Up @@ -818,60 +814,58 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; SSE2-LABEL: v8i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pxor %xmm5, %xmm5
; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: pcmpgtd %xmm5, %xmm2
; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
; SSE2-NEXT: pxor %xmm2, %xmm4
; SSE2-NEXT: movdqa %xmm4, %xmm2
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: psubd %xmm2, %xmm5
; SSE2-NEXT: pcmpgtd %xmm4, %xmm2
; SSE2-NEXT: pcmpgtd %xmm5, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pandn %xmm5, %xmm2
; SSE2-NEXT: psrad $31, %xmm5
; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: pxor %xmm6, %xmm0
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pxor %xmm6, %xmm5
; SSE2-NEXT: pand %xmm5, %xmm0
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psubd %xmm3, %xmm2
; SSE2-NEXT: pcmpgtd %xmm5, %xmm3
; SSE2-NEXT: pcmpgtd %xmm4, %xmm3
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm3, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: pandn %xmm2, %xmm3
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pxor %xmm6, %xmm2
; SSE2-NEXT: pand %xmm1, %xmm2
; SSE2-NEXT: por %xmm3, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: por %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: v8i32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa %xmm0, %xmm4
; SSSE3-NEXT: pxor %xmm5, %xmm5
; SSSE3-NEXT: psubd %xmm2, %xmm0
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
; SSSE3-NEXT: pxor %xmm2, %xmm4
; SSSE3-NEXT: movdqa %xmm4, %xmm2
; SSSE3-NEXT: pandn %xmm0, %xmm2
; SSSE3-NEXT: psrad $31, %xmm0
; SSSE3-NEXT: pxor %xmm4, %xmm4
; SSSE3-NEXT: movdqa %xmm0, %xmm5
; SSSE3-NEXT: psubd %xmm2, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm0
; SSSE3-NEXT: pxor %xmm2, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm2
; SSSE3-NEXT: pandn %xmm5, %xmm2
; SSSE3-NEXT: psrad $31, %xmm5
; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
; SSSE3-NEXT: pxor %xmm6, %xmm0
; SSSE3-NEXT: pand %xmm4, %xmm0
; SSSE3-NEXT: pxor %xmm6, %xmm5
; SSSE3-NEXT: pand %xmm5, %xmm0
; SSSE3-NEXT: por %xmm2, %xmm0
; SSSE3-NEXT: movdqa %xmm1, %xmm2
; SSSE3-NEXT: psubd %xmm3, %xmm2
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
; SSSE3-NEXT: pxor %xmm3, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm3
; SSSE3-NEXT: pandn %xmm2, %xmm3
; SSSE3-NEXT: psrad $31, %xmm2
; SSSE3-NEXT: pxor %xmm6, %xmm2
; SSSE3-NEXT: pand %xmm1, %xmm2
; SSSE3-NEXT: por %xmm3, %xmm2
; SSSE3-NEXT: movdqa %xmm2, %xmm1
; SSSE3-NEXT: pand %xmm2, %xmm1
; SSSE3-NEXT: por %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: v8i32:
Expand Down Expand Up @@ -967,108 +961,102 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
; SSE2-LABEL: v16i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm8
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm9, %xmm9
; SSE2-NEXT: psubd %xmm4, %xmm0
; SSE2-NEXT: pcmpgtd %xmm9, %xmm4
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: pandn %xmm0, %xmm4
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm8
; SSE2-NEXT: movdqa %xmm0, %xmm9
; SSE2-NEXT: psubd %xmm4, %xmm9
; SSE2-NEXT: pcmpgtd %xmm8, %xmm4
; SSE2-NEXT: pcmpgtd %xmm9, %xmm0
; SSE2-NEXT: pxor %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pandn %xmm9, %xmm4
; SSE2-NEXT: psrad $31, %xmm9
; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: pxor %xmm10, %xmm0
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm10, %xmm9
; SSE2-NEXT: pand %xmm9, %xmm0
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm8, %xmm1
; SSE2-NEXT: psubd %xmm5, %xmm1
; SSE2-NEXT: pcmpgtd %xmm9, %xmm5
; SSE2-NEXT: pcmpgtd %xmm1, %xmm8
; SSE2-NEXT: pxor %xmm5, %xmm8
; SSE2-NEXT: movdqa %xmm8, %xmm4
; SSE2-NEXT: pandn %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: pxor %xmm10, %xmm1
; SSE2-NEXT: pand %xmm8, %xmm1
; SSE2-NEXT: por %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psubd %xmm5, %xmm4
; SSE2-NEXT: pcmpgtd %xmm8, %xmm5
; SSE2-NEXT: pcmpgtd %xmm4, %xmm1
; SSE2-NEXT: pxor %xmm5, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: pandn %xmm4, %xmm5
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pxor %xmm10, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: por %xmm5, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: psubd %xmm6, %xmm4
; SSE2-NEXT: pcmpgtd %xmm9, %xmm6
; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
; SSE2-NEXT: pcmpgtd %xmm4, %xmm2
; SSE2-NEXT: pxor %xmm6, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm5
; SSE2-NEXT: pandn %xmm4, %xmm5
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pxor %xmm10, %xmm4
; SSE2-NEXT: pand %xmm2, %xmm4
; SSE2-NEXT: por %xmm5, %xmm4
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: psubd %xmm7, %xmm5
; SSE2-NEXT: pcmpgtd %xmm9, %xmm7
; SSE2-NEXT: pcmpgtd %xmm5, %xmm3
; SSE2-NEXT: pand %xmm4, %xmm2
; SSE2-NEXT: por %xmm5, %xmm2
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psubd %xmm7, %xmm4
; SSE2-NEXT: pcmpgtd %xmm8, %xmm7
; SSE2-NEXT: pcmpgtd %xmm4, %xmm3
; SSE2-NEXT: pxor %xmm7, %xmm3
; SSE2-NEXT: movdqa %xmm3, %xmm2
; SSE2-NEXT: pandn %xmm5, %xmm2
; SSE2-NEXT: psrad $31, %xmm5
; SSE2-NEXT: pxor %xmm10, %xmm5
; SSE2-NEXT: pand %xmm3, %xmm5
; SSE2-NEXT: por %xmm2, %xmm5
; SSE2-NEXT: movdqa %xmm4, %xmm2
; SSE2-NEXT: movdqa %xmm5, %xmm3
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: pandn %xmm4, %xmm5
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pxor %xmm10, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm3
; SSE2-NEXT: por %xmm5, %xmm3
; SSE2-NEXT: retq
;
; SSSE3-LABEL: v16i32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movdqa %xmm1, %xmm8
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm9, %xmm9
; SSSE3-NEXT: psubd %xmm4, %xmm0
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm4, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm4
; SSSE3-NEXT: pandn %xmm0, %xmm4
; SSSE3-NEXT: psrad $31, %xmm0
; SSSE3-NEXT: pxor %xmm8, %xmm8
; SSSE3-NEXT: movdqa %xmm0, %xmm9
; SSSE3-NEXT: psubd %xmm4, %xmm9
; SSSE3-NEXT: pcmpgtd %xmm8, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm0
; SSSE3-NEXT: pxor %xmm4, %xmm0
; SSSE3-NEXT: movdqa %xmm0, %xmm4
; SSSE3-NEXT: pandn %xmm9, %xmm4
; SSSE3-NEXT: psrad $31, %xmm9
; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
; SSSE3-NEXT: pxor %xmm10, %xmm0
; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: pxor %xmm10, %xmm9
; SSSE3-NEXT: pand %xmm9, %xmm0
; SSSE3-NEXT: por %xmm4, %xmm0
; SSSE3-NEXT: movdqa %xmm8, %xmm1
; SSSE3-NEXT: psubd %xmm5, %xmm1
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm8
; SSSE3-NEXT: pxor %xmm5, %xmm8
; SSSE3-NEXT: movdqa %xmm8, %xmm4
; SSSE3-NEXT: pandn %xmm1, %xmm4
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: pxor %xmm10, %xmm1
; SSSE3-NEXT: pand %xmm8, %xmm1
; SSSE3-NEXT: por %xmm4, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm4
; SSSE3-NEXT: psubd %xmm5, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm8, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm1
; SSSE3-NEXT: pxor %xmm5, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm5
; SSSE3-NEXT: pandn %xmm4, %xmm5
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pxor %xmm10, %xmm4
; SSSE3-NEXT: pand %xmm4, %xmm1
; SSSE3-NEXT: por %xmm5, %xmm1
; SSSE3-NEXT: movdqa %xmm2, %xmm4
; SSSE3-NEXT: psubd %xmm6, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6
; SSSE3-NEXT: pcmpgtd %xmm8, %xmm6
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm2
; SSSE3-NEXT: pxor %xmm6, %xmm2
; SSSE3-NEXT: movdqa %xmm2, %xmm5
; SSSE3-NEXT: pandn %xmm4, %xmm5
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pxor %xmm10, %xmm4
; SSSE3-NEXT: pand %xmm2, %xmm4
; SSSE3-NEXT: por %xmm5, %xmm4
; SSSE3-NEXT: movdqa %xmm3, %xmm5
; SSSE3-NEXT: psubd %xmm7, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm3
; SSSE3-NEXT: pand %xmm4, %xmm2
; SSSE3-NEXT: por %xmm5, %xmm2
; SSSE3-NEXT: movdqa %xmm3, %xmm4
; SSSE3-NEXT: psubd %xmm7, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm8, %xmm7
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm3
; SSSE3-NEXT: pxor %xmm7, %xmm3
; SSSE3-NEXT: movdqa %xmm3, %xmm2
; SSSE3-NEXT: pandn %xmm5, %xmm2
; SSSE3-NEXT: psrad $31, %xmm5
; SSSE3-NEXT: pxor %xmm10, %xmm5
; SSSE3-NEXT: pand %xmm3, %xmm5
; SSSE3-NEXT: por %xmm2, %xmm5
; SSSE3-NEXT: movdqa %xmm4, %xmm2
; SSSE3-NEXT: movdqa %xmm5, %xmm3
; SSSE3-NEXT: movdqa %xmm3, %xmm5
; SSSE3-NEXT: pandn %xmm4, %xmm5
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pxor %xmm10, %xmm4
; SSSE3-NEXT: pand %xmm4, %xmm3
; SSSE3-NEXT: por %xmm5, %xmm3
; SSSE3-NEXT: retq
;
; SSE41-LABEL: v16i32:
Expand Down
12 changes: 8 additions & 4 deletions llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,8 @@ define <8 x double> @stack_fold_cmppd_mask(<8 x double> %a0, <8 x double> %a1, <
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: kandb %k0, %k1, %k1
; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
; CHECK-NEXT: vblendmpd (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovupd (%rsp), %zmm1 # 64-byte Reload
; CHECK-NEXT: vmovapd %zmm1, %zmm0 {%k1}
; CHECK-NEXT: addq $136, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down Expand Up @@ -369,7 +370,8 @@ define <8 x double> @stack_fold_cmppd_mask_commuted(<8 x double> %a0, <8 x doubl
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: kandb %k0, %k1, %k1
; CHECK-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
; CHECK-NEXT: vblendmpd (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovupd (%rsp), %zmm1 # 64-byte Reload
; CHECK-NEXT: vmovapd %zmm1, %zmm0 {%k1}
; CHECK-NEXT: addq $136, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down Expand Up @@ -421,7 +423,8 @@ define <16 x float> @stack_fold_cmpps_mask(<16 x float> %a0, <16 x float> %a1, <
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: kandw %k0, %k1, %k1
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
; CHECK-NEXT: vblendmps (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovups (%rsp), %zmm1 # 64-byte Reload
; CHECK-NEXT: vmovaps %zmm1, %zmm0 {%k1}
; CHECK-NEXT: addq $136, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down Expand Up @@ -454,7 +457,8 @@ define <16 x float> @stack_fold_cmpps_mask_commuted(<16 x float> %a0, <16 x floa
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: kandw %k0, %k1, %k1
; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
; CHECK-NEXT: vblendmps (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovups (%rsp), %zmm1 # 64-byte Reload
; CHECK-NEXT: vmovaps %zmm1, %zmm0 {%k1}
; CHECK-NEXT: addq $136, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/CodeGen/X86/stack-folding-int-avx512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2037,7 +2037,8 @@ define <16 x i32> @stack_fold_pcmpeqd_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k1 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
; CHECK-NEXT: vpblendmd (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqu64 (%rsp), %zmm1 # 64-byte Reload
; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; CHECK-NEXT: addq $136, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down Expand Up @@ -2069,7 +2070,8 @@ define <16 x i32> @stack_fold_pcmpeqd_mask_commuted(<16 x i32> %a0, <16 x i32> %
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k1 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
; CHECK-NEXT: vpblendmd (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqu64 (%rsp), %zmm1 # 64-byte Reload
; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; CHECK-NEXT: addq $136, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down Expand Up @@ -2101,7 +2103,8 @@ define <16 x i32> @stack_fold_pcmpled_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcmpled {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k1 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
; CHECK-NEXT: vpblendmd (%rsp), %zmm0, %zmm0 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqu64 (%rsp), %zmm1 # 64-byte Reload
; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; CHECK-NEXT: addq $136, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
Expand Down
32 changes: 12 additions & 20 deletions llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,9 @@ define i64 @test__blci_u64(i64 %a0) {
define i64 @test__blcic_u64(i64 %a0) {
; X64-LABEL: test__blcic_u64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: xorq $-1, %rcx
; X64-NEXT: addq $1, %rax
; X64-NEXT: andq %rcx, %rax
; X64-NEXT: leaq 1(%rdi), %rax
; X64-NEXT: xorq $-1, %rdi
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: retq
%1 = xor i64 %a0, -1
%2 = add i64 %a0, 1
Expand Down Expand Up @@ -87,11 +85,9 @@ define i64 @test__blsfill_u64(i64 %a0) {
define i64 @test__blsic_u64(i64 %a0) {
; X64-LABEL: test__blsic_u64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: xorq $-1, %rcx
; X64-NEXT: subq $1, %rax
; X64-NEXT: orq %rcx, %rax
; X64-NEXT: leaq -1(%rdi), %rax
; X64-NEXT: xorq $-1, %rdi
; X64-NEXT: orq %rdi, %rax
; X64-NEXT: retq
%1 = xor i64 %a0, -1
%2 = sub i64 %a0, 1
Expand All @@ -102,11 +98,9 @@ define i64 @test__blsic_u64(i64 %a0) {
define i64 @test__t1mskc_u64(i64 %a0) {
; X64-LABEL: test__t1mskc_u64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: xorq $-1, %rcx
; X64-NEXT: addq $1, %rax
; X64-NEXT: orq %rcx, %rax
; X64-NEXT: leaq 1(%rdi), %rax
; X64-NEXT: xorq $-1, %rdi
; X64-NEXT: orq %rdi, %rax
; X64-NEXT: retq
%1 = xor i64 %a0, -1
%2 = add i64 %a0, 1
Expand All @@ -117,11 +111,9 @@ define i64 @test__t1mskc_u64(i64 %a0) {
define i64 @test__tzmsk_u64(i64 %a0) {
; X64-LABEL: test__tzmsk_u64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: xorq $-1, %rcx
; X64-NEXT: subq $1, %rax
; X64-NEXT: andq %rcx, %rax
; X64-NEXT: leaq -1(%rdi), %rax
; X64-NEXT: xorq $-1, %rdi
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: retq
%1 = xor i64 %a0, -1
%2 = sub i64 %a0, 1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ define i32 @test__blcic_u32(i32 %a0) {
;
; X64-LABEL: test__blcic_u32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: xorl $-1, %ecx
; X64-NEXT: addl $1, %eax
; X64-NEXT: andl %ecx, %eax
; X64-NEXT: retq
%1 = xor i32 %a0, -1
Expand Down Expand Up @@ -152,10 +152,10 @@ define i32 @test__blsic_u32(i32 %a0) {
;
; X64-LABEL: test__blsic_u32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal -1(%rdi), %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: xorl $-1, %ecx
; X64-NEXT: subl $1, %eax
; X64-NEXT: orl %ecx, %eax
; X64-NEXT: retq
%1 = xor i32 %a0, -1
Expand All @@ -176,10 +176,10 @@ define i32 @test__t1mskc_u32(i32 %a0) {
;
; X64-LABEL: test__t1mskc_u32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: xorl $-1, %ecx
; X64-NEXT: addl $1, %eax
; X64-NEXT: orl %ecx, %eax
; X64-NEXT: retq
%1 = xor i32 %a0, -1
Expand All @@ -200,10 +200,10 @@ define i32 @test__tzmsk_u32(i32 %a0) {
;
; X64-LABEL: test__tzmsk_u32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal -1(%rdi), %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: xorl $-1, %ecx
; X64-NEXT: subl $1, %eax
; X64-NEXT: andl %ecx, %eax
; X64-NEXT: retq
%1 = xor i32 %a0, -1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/udiv_fix_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -172,20 +172,20 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: leaq (%rdi,%rdi), %rsi
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shrq $63, %rax
; X64-NEXT: shrdq $33, %rax, %rsi
; X64-NEXT: leaq (%rdi,%rdi), %rax
; X64-NEXT: movq %rdi, %rsi
; X64-NEXT: shrq $63, %rsi
; X64-NEXT: shldq $31, %rax, %rsi
; X64-NEXT: shlq $32, %rdi
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: callq __udivti3@PLT
; X64-NEXT: cmpq $2, %rdx
; X64-NEXT: movq $-1, %rcx
; X64-NEXT: cmovbq %rax, %rcx
; X64-NEXT: cmovaeq %rcx, %rax
; X64-NEXT: cmpq $1, %rdx
; X64-NEXT: movl $1, %eax
; X64-NEXT: cmovbq %rdx, %rax
; X64-NEXT: shldq $63, %rcx, %rax
; X64-NEXT: movl $1, %ecx
; X64-NEXT: cmovbq %rdx, %rcx
; X64-NEXT: shrdq $1, %rcx, %rax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
;
Expand Down
41 changes: 20 additions & 21 deletions llvm/test/CodeGen/X86/umax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind {
define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X64-LABEL: test_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: cmpq %rdx, %rdi
; X64-NEXT: cmovaq %rdi, %rdx
; X64-NEXT: cmpq %rcx, %rsi
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: cmovaq %rdi, %rax
; X64-NEXT: cmoveq %rdx, %rax
; X64-NEXT: cmpq %rcx, %rsi
; X64-NEXT: cmovaq %rdi, %rdx
; X64-NEXT: cmovneq %rdx, %rax
; X64-NEXT: cmovaq %rsi, %rcx
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: retq
Expand Down Expand Up @@ -358,23 +358,22 @@ define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; SSE-LABEL: test_v8i32:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: movdqa %xmm2, %xmm6
; SSE-NEXT: pxor %xmm5, %xmm6
; SSE-NEXT: movdqa %xmm0, %xmm4
; SSE-NEXT: pxor %xmm5, %xmm4
; SSE-NEXT: pcmpgtd %xmm6, %xmm4
; SSE-NEXT: pand %xmm4, %xmm0
; SSE-NEXT: pandn %xmm2, %xmm4
; SSE-NEXT: por %xmm0, %xmm4
; SSE-NEXT: movdqa %xmm3, %xmm0
; SSE-NEXT: pxor %xmm5, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm5
; SSE-NEXT: pcmpgtd %xmm0, %xmm5
; SSE-NEXT: pand %xmm5, %xmm1
; SSE-NEXT: pandn %xmm3, %xmm5
; SSE-NEXT: por %xmm5, %xmm1
; SSE-NEXT: movdqa %xmm4, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: movdqa %xmm2, %xmm5
; SSE-NEXT: pxor %xmm4, %xmm5
; SSE-NEXT: movdqa %xmm0, %xmm6
; SSE-NEXT: pxor %xmm4, %xmm6
; SSE-NEXT: pcmpgtd %xmm5, %xmm6
; SSE-NEXT: pand %xmm6, %xmm0
; SSE-NEXT: pandn %xmm2, %xmm6
; SSE-NEXT: por %xmm6, %xmm0
; SSE-NEXT: movdqa %xmm3, %xmm2
; SSE-NEXT: pxor %xmm4, %xmm2
; SSE-NEXT: pxor %xmm1, %xmm4
; SSE-NEXT: pcmpgtd %xmm2, %xmm4
; SSE-NEXT: pand %xmm4, %xmm1
; SSE-NEXT: pandn %xmm3, %xmm4
; SSE-NEXT: por %xmm4, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v8i32:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/umin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind {
define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X64-LABEL: test_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: cmpq %rdx, %rdi
; X64-NEXT: cmovbq %rdi, %rdx
; X64-NEXT: cmpq %rcx, %rsi
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: cmovbq %rdi, %rax
; X64-NEXT: cmoveq %rdx, %rax
; X64-NEXT: cmpq %rcx, %rsi
; X64-NEXT: cmovbq %rdi, %rdx
; X64-NEXT: cmovneq %rdx, %rax
; X64-NEXT: cmovbq %rsi, %rcx
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: retq
Expand Down
11 changes: 5 additions & 6 deletions llvm/test/CodeGen/X86/umul_fix.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: adcl %edi, %edx
; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: shldl $30, %eax, %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: shldl $30, %eax, %edx
; X86-NEXT: shldl $30, %esi, %eax
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
Expand Down Expand Up @@ -373,11 +372,11 @@ define i64 @func9(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: addl %edx, %ebx
; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: movl %edi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
Expand Down
72 changes: 35 additions & 37 deletions llvm/test/CodeGen/X86/umul_fix_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -282,22 +282,21 @@ define i64 @func5(i64 %x, i64 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: testl %esi, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: testl %edi, %edi
; X86-NEXT: setne %dl
; X86-NEXT: testl %eax, %eax
; X86-NEXT: setne %bl
; X86-NEXT: andb %dl, %bl
; X86-NEXT: mull %ebp
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %eax, %esi
; X86-NEXT: seto %bh
; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: seto %cl
; X86-NEXT: orb %bh, %cl
; X86-NEXT: addl %edi, %esi
; X86-NEXT: addl %eax, %esi
; X86-NEXT: movl %edx, %eax
; X86-NEXT: mull %ebp
; X86-NEXT: addl %esi, %edx
Expand Down Expand Up @@ -445,30 +444,30 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %ebx, %ebp
; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %edi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: mull %edi
; X86-NEXT: addl %esi, %eax
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: addl %ebx, %edx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: addl %ebp, %edx
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl $1, %edi
; X86-NEXT: cmpl $1, %ebx
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: notl %ecx
; X86-NEXT: orl %ecx, %eax
Expand Down Expand Up @@ -501,26 +500,26 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edi
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %ebx, %ebp
; X86-NEXT: mull %ebx
; X86-NEXT: addl %edx, %edi
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: mull %ebx
; X86-NEXT: addl %edi, %eax
; X86-NEXT: adcl %esi, %edx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: addl %ebx, %edx
; X86-NEXT: addl %ebp, %edx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: shrdl $31, %edx, %eax
; X86-NEXT: movl %edx, %esi
Expand All @@ -530,9 +529,8 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; X86-NEXT: sbbl %edi, %edi
; X86-NEXT: notl %edi
; X86-NEXT: orl %edi, %eax
; X86-NEXT: shldl $1, %edx, %ecx
; X86-NEXT: orl %edi, %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shrdl $31, %ecx, %edx
; X86-NEXT: orl %edi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X64-NEXT: seto %r10b
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: seto %r11b
; X64-NEXT: orb %r10b, %r11b
; X64-NEXT: addq %rsi, %rcx
; X64-NEXT: addq %rax, %rsi
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: mulq %r8
; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: addq %rsi, %rdx
; X64-NEXT: setb %cl
; X64-NEXT: orb %r11b, %cl
; X64-NEXT: orb %r9b, %cl
Expand Down
11 changes: 5 additions & 6 deletions llvm/test/CodeGen/X86/umulo-64-legalisation-lowering.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,21 @@ define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: testl %esi, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: testl %edi, %edi
; X86-NEXT: setne %dl
; X86-NEXT: testl %eax, %eax
; X86-NEXT: setne %bl
; X86-NEXT: andb %dl, %bl
; X86-NEXT: mull %ebp
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %eax, %esi
; X86-NEXT: seto %bh
; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: seto %ch
; X86-NEXT: orb %bh, %ch
; X86-NEXT: addl %edi, %esi
; X86-NEXT: addl %eax, %esi
; X86-NEXT: movl %edx, %eax
; X86-NEXT: mull %ebp
; X86-NEXT: addl %esi, %edx
Expand Down
61 changes: 30 additions & 31 deletions llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -545,18 +545,18 @@ define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b)
define i32 @out_constant_varx_mone(i32 %x, i32 %y, i32 %mask) {
; CHECK-NOBMI-LABEL: out_constant_varx_mone:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: movl %edi, %eax
; CHECK-NOBMI-NEXT: andl %edx, %eax
; CHECK-NOBMI-NEXT: notl %edx
; CHECK-NOBMI-NEXT: orl %edx, %eax
; CHECK-NOBMI-NEXT: andl %edx, %edi
; CHECK-NOBMI-NEXT: movl %edx, %eax
; CHECK-NOBMI-NEXT: notl %eax
; CHECK-NOBMI-NEXT: orl %edi, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out_constant_varx_mone:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: movl %edi, %eax
; CHECK-BMI-NEXT: andl %edx, %eax
; CHECK-BMI-NEXT: notl %edx
; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %edi
; CHECK-BMI-NEXT: movl %edx, %eax
; CHECK-BMI-NEXT: notl %eax
; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: retq
%notmask = xor i32 %mask, -1
%mx = and i32 %mask, %x
Expand Down Expand Up @@ -674,11 +674,10 @@ define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-NOBMI-LABEL: out_constant_varx_42_invmask:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: movl %edx, %eax
; CHECK-NOBMI-NEXT: movl %edx, %ecx
; CHECK-NOBMI-NEXT: notl %ecx
; CHECK-NOBMI-NEXT: andl %edi, %ecx
; CHECK-NOBMI-NEXT: andl $42, %eax
; CHECK-NOBMI-NEXT: orl %ecx, %eax
; CHECK-NOBMI-NEXT: notl %eax
; CHECK-NOBMI-NEXT: andl %edi, %eax
; CHECK-NOBMI-NEXT: andl $42, %edx
; CHECK-NOBMI-NEXT: orl %edx, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out_constant_varx_42_invmask:
Expand Down Expand Up @@ -758,18 +757,18 @@ define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-NOBMI-LABEL: out_constant_mone_vary_invmask:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: movl %esi, %eax
; CHECK-NOBMI-NEXT: andl %edx, %eax
; CHECK-NOBMI-NEXT: notl %edx
; CHECK-NOBMI-NEXT: orl %edx, %eax
; CHECK-NOBMI-NEXT: andl %edx, %esi
; CHECK-NOBMI-NEXT: movl %edx, %eax
; CHECK-NOBMI-NEXT: notl %eax
; CHECK-NOBMI-NEXT: orl %esi, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out_constant_mone_vary_invmask:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: movl %esi, %eax
; CHECK-BMI-NEXT: andl %edx, %eax
; CHECK-BMI-NEXT: notl %edx
; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %esi
; CHECK-BMI-NEXT: movl %edx, %eax
; CHECK-BMI-NEXT: notl %eax
; CHECK-BMI-NEXT: orl %esi, %eax
; CHECK-BMI-NEXT: retq
%notmask = xor i32 %mask, -1
%mx = and i32 %notmask, -1
Expand Down Expand Up @@ -846,20 +845,20 @@ define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-NOBMI-LABEL: out_constant_42_vary_invmask:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: movl %esi, %eax
; CHECK-NOBMI-NEXT: andl %edx, %eax
; CHECK-NOBMI-NEXT: notl %edx
; CHECK-NOBMI-NEXT: andl $42, %edx
; CHECK-NOBMI-NEXT: orl %edx, %eax
; CHECK-NOBMI-NEXT: andl %edx, %esi
; CHECK-NOBMI-NEXT: movl %edx, %eax
; CHECK-NOBMI-NEXT: notl %eax
; CHECK-NOBMI-NEXT: andl $42, %eax
; CHECK-NOBMI-NEXT: orl %esi, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out_constant_42_vary_invmask:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: movl %esi, %eax
; CHECK-BMI-NEXT: andl %edx, %eax
; CHECK-BMI-NEXT: notl %edx
; CHECK-BMI-NEXT: andl $42, %edx
; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %esi
; CHECK-BMI-NEXT: movl %edx, %eax
; CHECK-BMI-NEXT: notl %eax
; CHECK-BMI-NEXT: andl $42, %eax
; CHECK-BMI-NEXT: orl %esi, %eax
; CHECK-BMI-NEXT: retq
%notmask = xor i32 %mask, -1
%mx = and i32 %notmask, 42
Expand Down
21 changes: 10 additions & 11 deletions llvm/test/CodeGen/X86/urem-lkk.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,17 @@ define i32 @fold_urem_positive_even(i32 %x) {
define i32 @combine_urem_udiv(i32 %x) {
; CHECK-LABEL: combine_urem_udiv:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: imulq $1491936009, %rax, %rax # imm = 0x58ED2309
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movl %edi, %ecx
; CHECK-NEXT: subl %eax, %ecx
; CHECK-NEXT: shrl %ecx
; CHECK-NEXT: addl %eax, %ecx
; CHECK-NEXT: shrl $6, %ecx
; CHECK-NEXT: imull $95, %ecx, %eax
; CHECK-NEXT: subl %eax, %edi
; CHECK-NEXT: leal (%rdi,%rcx), %eax
; CHECK-NEXT: imulq $1491936009, %rax, %rcx # imm = 0x58ED2309
; CHECK-NEXT: shrq $32, %rcx
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: subl %ecx, %eax
; CHECK-NEXT: shrl %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: shrl $6, %eax
; CHECK-NEXT: imull $95, %eax, %ecx
; CHECK-NEXT: subl %ecx, %edi
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: retq
%1 = urem i32 %x, 95
%2 = udiv i32 %x, 95
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,9 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295,4294967295,1431655764]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm0, %xmm1
; CHECK-SSE41-NEXT: pxor %xmm0, %xmm0
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
; CHECK-SSE41-NEXT: retq
;
; CHECK-AVX1-LABEL: t32_tautological:
Expand Down
34 changes: 16 additions & 18 deletions llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3037,30 +3037,28 @@ define <4 x i32> @strict_vector_fptosi_v4f32_to_v4i32(<4 x float> %a) #0 {
define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; SSE-32: # %bb.0:
; SSE-32-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; SSE-32-NEXT: movaps %xmm0, %xmm3
; SSE-32-NEXT: cmpltps %xmm2, %xmm3
; SSE-32-NEXT: movaps %xmm3, %xmm1
; SSE-32-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; SSE-32-NEXT: andnps %xmm2, %xmm3
; SSE-32-NEXT: subps %xmm3, %xmm0
; SSE-32-NEXT: movaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; SSE-32-NEXT: movaps %xmm0, %xmm2
; SSE-32-NEXT: cmpltps %xmm1, %xmm2
; SSE-32-NEXT: movaps %xmm2, %xmm3
; SSE-32-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
; SSE-32-NEXT: andnps %xmm1, %xmm2
; SSE-32-NEXT: subps %xmm2, %xmm0
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT: xorps %xmm0, %xmm1
; SSE-32-NEXT: movaps %xmm1, %xmm0
; SSE-32-NEXT: xorps %xmm3, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; SSE-64: # %bb.0:
; SSE-64-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; SSE-64-NEXT: movaps %xmm0, %xmm3
; SSE-64-NEXT: cmpltps %xmm2, %xmm3
; SSE-64-NEXT: movaps %xmm3, %xmm1
; SSE-64-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-64-NEXT: andnps %xmm2, %xmm3
; SSE-64-NEXT: subps %xmm3, %xmm0
; SSE-64-NEXT: movaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; SSE-64-NEXT: movaps %xmm0, %xmm2
; SSE-64-NEXT: cmpltps %xmm1, %xmm2
; SSE-64-NEXT: movaps %xmm2, %xmm3
; SSE-64-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSE-64-NEXT: andnps %xmm1, %xmm2
; SSE-64-NEXT: subps %xmm2, %xmm0
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT: xorps %xmm0, %xmm1
; SSE-64-NEXT: movaps %xmm1, %xmm0
; SSE-64-NEXT: xorps %xmm3, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/X86/vec_ctbits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@ define <2 x i64> @footz(<2 x i64> %a) nounwind {
; CHECK-NEXT: paddb %xmm2, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $4, %xmm1
; CHECK-NEXT: paddb %xmm0, %xmm1
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: psadbw %xmm0, %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: paddb %xmm1, %xmm0
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: psadbw %xmm1, %xmm0
; CHECK-NEXT: retq
%c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
ret <2 x i64> %c
Expand Down Expand Up @@ -93,11 +92,10 @@ define <2 x i64> @foopop(<2 x i64> %a) nounwind {
; CHECK-NEXT: paddb %xmm2, %xmm0
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $4, %xmm1
; CHECK-NEXT: paddb %xmm0, %xmm1
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: psadbw %xmm0, %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: paddb %xmm1, %xmm0
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: psadbw %xmm1, %xmm0
; CHECK-NEXT: retq
%c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
ret <2 x i64> %c
Expand Down
72 changes: 30 additions & 42 deletions llvm/test/CodeGen/X86/vec_minmax_sint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,16 @@ define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm2, %xmm6
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
; SSE41-NEXT: movdqa %xmm4, %xmm6
; SSE41-NEXT: pxor %xmm5, %xmm6
; SSE41-NEXT: movdqa %xmm0, %xmm7
; SSE41-NEXT: pxor %xmm5, %xmm7
; SSE41-NEXT: movdqa %xmm7, %xmm0
; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
; SSE41-NEXT: pand %xmm6, %xmm0
; SSE41-NEXT: por %xmm7, %xmm0
; SSE41-NEXT: movdqa %xmm6, %xmm7
; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
; SSE41-NEXT: pand %xmm7, %xmm0
; SSE41-NEXT: por %xmm6, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
; SSE41-NEXT: movdqa %xmm3, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
Expand Down Expand Up @@ -192,8 +192,7 @@ define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_gt_v4i32:
Expand Down Expand Up @@ -222,14 +221,12 @@ define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: por %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_gt_v8i32:
Expand Down Expand Up @@ -319,8 +316,7 @@ define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_gt_v16i8:
Expand Down Expand Up @@ -349,14 +345,12 @@ define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE2-NEXT: pcmpgtb %xmm2, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pcmpgtb %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: por %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_gt_v32i8:
Expand Down Expand Up @@ -507,16 +501,16 @@ define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm2, %xmm6
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
; SSE41-NEXT: movdqa %xmm4, %xmm6
; SSE41-NEXT: pxor %xmm5, %xmm6
; SSE41-NEXT: movdqa %xmm0, %xmm7
; SSE41-NEXT: pxor %xmm5, %xmm7
; SSE41-NEXT: movdqa %xmm7, %xmm0
; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
; SSE41-NEXT: pand %xmm6, %xmm0
; SSE41-NEXT: por %xmm7, %xmm0
; SSE41-NEXT: movdqa %xmm6, %xmm7
; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
; SSE41-NEXT: pand %xmm7, %xmm0
; SSE41-NEXT: por %xmm6, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
; SSE41-NEXT: movdqa %xmm3, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
Expand Down Expand Up @@ -579,8 +573,7 @@ define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_ge_v4i32:
Expand Down Expand Up @@ -609,14 +602,12 @@ define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: por %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_ge_v8i32:
Expand Down Expand Up @@ -706,8 +697,7 @@ define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_ge_v16i8:
Expand Down Expand Up @@ -736,14 +726,12 @@ define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
; SSE2-NEXT: pcmpgtb %xmm2, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pcmpgtb %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: por %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_ge_v32i8:
Expand Down
102 changes: 50 additions & 52 deletions llvm/test/CodeGen/X86/vec_minmax_uint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,16 @@ define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
; SSE41-NEXT: movdqa %xmm2, %xmm6
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
; SSE41-NEXT: movdqa %xmm4, %xmm6
; SSE41-NEXT: pxor %xmm5, %xmm6
; SSE41-NEXT: movdqa %xmm0, %xmm7
; SSE41-NEXT: pxor %xmm5, %xmm7
; SSE41-NEXT: movdqa %xmm7, %xmm0
; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
; SSE41-NEXT: pand %xmm6, %xmm0
; SSE41-NEXT: por %xmm7, %xmm0
; SSE41-NEXT: movdqa %xmm6, %xmm7
; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
; SSE41-NEXT: pand %xmm7, %xmm0
; SSE41-NEXT: por %xmm6, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
; SSE41-NEXT: movdqa %xmm3, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
Expand Down Expand Up @@ -245,23 +245,22 @@ define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE2-LABEL: max_gt_v8i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm6
; SSE2-NEXT: pxor %xmm5, %xmm6
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pxor %xmm5, %xmm4
; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: movdqa %xmm3, %xmm0
; SSE2-NEXT: pxor %xmm5, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm5
; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
; SSE2-NEXT: pand %xmm5, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm5
; SSE2-NEXT: por %xmm5, %xmm1
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm5
; SSE2-NEXT: pxor %xmm4, %xmm5
; SSE2-NEXT: movdqa %xmm0, %xmm6
; SSE2-NEXT: pxor %xmm4, %xmm6
; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
; SSE2-NEXT: pand %xmm6, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm6
; SSE2-NEXT: por %xmm6, %xmm0
; SSE2-NEXT: movdqa %xmm3, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm4
; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm4
; SSE2-NEXT: por %xmm4, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_gt_v8i32:
Expand Down Expand Up @@ -537,16 +536,16 @@ define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
; SSE41-NEXT: movdqa %xmm2, %xmm6
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
; SSE41-NEXT: movdqa %xmm4, %xmm6
; SSE41-NEXT: pxor %xmm5, %xmm6
; SSE41-NEXT: movdqa %xmm0, %xmm7
; SSE41-NEXT: pxor %xmm5, %xmm7
; SSE41-NEXT: movdqa %xmm7, %xmm0
; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
; SSE41-NEXT: pand %xmm6, %xmm0
; SSE41-NEXT: por %xmm7, %xmm0
; SSE41-NEXT: movdqa %xmm6, %xmm7
; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
; SSE41-NEXT: pand %xmm7, %xmm0
; SSE41-NEXT: por %xmm6, %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
; SSE41-NEXT: movdqa %xmm3, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
Expand Down Expand Up @@ -652,23 +651,22 @@ define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE2-LABEL: max_ge_v8i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm6
; SSE2-NEXT: pxor %xmm5, %xmm6
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pxor %xmm5, %xmm4
; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: movdqa %xmm3, %xmm0
; SSE2-NEXT: pxor %xmm5, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm5
; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
; SSE2-NEXT: pand %xmm5, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm5
; SSE2-NEXT: por %xmm5, %xmm1
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm5
; SSE2-NEXT: pxor %xmm4, %xmm5
; SSE2-NEXT: movdqa %xmm0, %xmm6
; SSE2-NEXT: pxor %xmm4, %xmm6
; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
; SSE2-NEXT: pand %xmm6, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm6
; SSE2-NEXT: por %xmm6, %xmm0
; SSE2-NEXT: movdqa %xmm3, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm4
; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm4
; SSE2-NEXT: por %xmm4, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: max_ge_v8i32:
Expand Down
49 changes: 24 additions & 25 deletions llvm/test/CodeGen/X86/vec_saddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -817,11 +817,11 @@ define <4 x i32> @saddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE2-NEXT: pslld $8, %xmm2
; SSE2-NEXT: psrad $8, %xmm2
; SSE2-NEXT: paddd %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: pslld $8, %xmm0
; SSE2-NEXT: psrad $8, %xmm0
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: pslld $8, %xmm1
; SSE2-NEXT: psrad $8, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: movw %ax, (%rdi)
Expand Down Expand Up @@ -852,11 +852,11 @@ define <4 x i32> @saddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSSE3-NEXT: pslld $8, %xmm2
; SSSE3-NEXT: psrad $8, %xmm2
; SSSE3-NEXT: paddd %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: pslld $8, %xmm0
; SSSE3-NEXT: psrad $8, %xmm0
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
; SSSE3-NEXT: movdqa %xmm2, %xmm1
; SSSE3-NEXT: pslld $8, %xmm1
; SSSE3-NEXT: psrad $8, %xmm1
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
; SSSE3-NEXT: pcmpeqd %xmm0, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm0
; SSSE3-NEXT: movd %xmm2, %eax
; SSSE3-NEXT: movw %ax, (%rdi)
Expand All @@ -881,25 +881,24 @@ define <4 x i32> @saddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
;
; SSE41-LABEL: saddo_v4i24:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pslld $8, %xmm1
; SSE41-NEXT: psrad $8, %xmm1
; SSE41-NEXT: pslld $8, %xmm2
; SSE41-NEXT: psrad $8, %xmm2
; SSE41-NEXT: paddd %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: pslld $8, %xmm0
; SSE41-NEXT: psrad $8, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
; SSE41-NEXT: paddd %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pslld $8, %xmm2
; SSE41-NEXT: psrad $8, %xmm2
; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pextrd $3, %xmm2, %eax
; SSE41-NEXT: pxor %xmm2, %xmm1
; SSE41-NEXT: pextrd $3, %xmm0, %eax
; SSE41-NEXT: movw %ax, 9(%rdi)
; SSE41-NEXT: pextrd $2, %xmm2, %ecx
; SSE41-NEXT: pextrd $2, %xmm0, %ecx
; SSE41-NEXT: movw %cx, 6(%rdi)
; SSE41-NEXT: pextrd $1, %xmm2, %edx
; SSE41-NEXT: pextrd $1, %xmm0, %edx
; SSE41-NEXT: movw %dx, 3(%rdi)
; SSE41-NEXT: movd %xmm2, %esi
; SSE41-NEXT: movd %xmm0, %esi
; SSE41-NEXT: movw %si, (%rdi)
; SSE41-NEXT: shrl $16, %eax
; SSE41-NEXT: movb %al, 11(%rdi)
Expand All @@ -909,6 +908,7 @@ define <4 x i32> @saddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE41-NEXT: movb %dl, 5(%rdi)
; SSE41-NEXT: shrl $16, %esi
; SSE41-NEXT: movb %sil, 2(%rdi)
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: saddo_v4i24:
Expand Down Expand Up @@ -989,11 +989,10 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; SSE-NEXT: pslld $31, %xmm1
; SSE-NEXT: movmskps %xmm1, %eax
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pxor %xmm0, %xmm1
; SSE-NEXT: pcmpeqd %xmm1, %xmm0
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: movb %al, (%rdi)
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: saddo_v4i1:
Expand Down
47 changes: 20 additions & 27 deletions llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psraw $15, %xmm1
; SSE-NEXT: psrlw $11, %xmm1
; SSE-NEXT: paddw %xmm0, %xmm1
; SSE-NEXT: psraw $5, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: paddw %xmm1, %xmm0
; SSE-NEXT: psraw $5, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sdiv_vec8x16:
Expand All @@ -32,9 +31,8 @@ define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psraw $15, %xmm1
; SSE-NEXT: psrlw $11, %xmm1
; SSE-NEXT: paddw %xmm0, %xmm1
; SSE-NEXT: psraw $5, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: paddw %xmm1, %xmm0
; SSE-NEXT: psraw $5, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sdiv_vec8x16_minsize:
Expand All @@ -55,9 +53,8 @@ define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: psrld $28, %xmm1
; SSE-NEXT: paddd %xmm0, %xmm1
; SSE-NEXT: psrad $4, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: psrad $4, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sdiv_vec4x32:
Expand Down Expand Up @@ -104,15 +101,13 @@ define <8 x i32> @sdiv8x32(<8 x i32> %var) {
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: psrad $31, %xmm2
; SSE-NEXT: psrld $26, %xmm2
; SSE-NEXT: paddd %xmm0, %xmm2
; SSE-NEXT: psrad $6, %xmm2
; SSE-NEXT: movdqa %xmm1, %xmm3
; SSE-NEXT: psrad $31, %xmm3
; SSE-NEXT: psrld $26, %xmm3
; SSE-NEXT: paddd %xmm1, %xmm3
; SSE-NEXT: psrad $6, %xmm3
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm3, %xmm1
; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: psrad $6, %xmm0
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psrad $31, %xmm2
; SSE-NEXT: psrld $26, %xmm2
; SSE-NEXT: paddd %xmm2, %xmm1
; SSE-NEXT: psrad $6, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: sdiv8x32:
Expand Down Expand Up @@ -147,15 +142,13 @@ define <16 x i16> @sdiv16x16(<16 x i16> %var) {
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: psraw $15, %xmm2
; SSE-NEXT: psrlw $14, %xmm2
; SSE-NEXT: paddw %xmm0, %xmm2
; SSE-NEXT: psraw $2, %xmm2
; SSE-NEXT: movdqa %xmm1, %xmm3
; SSE-NEXT: psraw $15, %xmm3
; SSE-NEXT: psrlw $14, %xmm3
; SSE-NEXT: paddw %xmm1, %xmm3
; SSE-NEXT: psraw $2, %xmm3
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm3, %xmm1
; SSE-NEXT: paddw %xmm2, %xmm0
; SSE-NEXT: psraw $2, %xmm0
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psraw $15, %xmm2
; SSE-NEXT: psrlw $14, %xmm2
; SSE-NEXT: paddw %xmm2, %xmm1
; SSE-NEXT: psraw $2, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: sdiv16x16:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/vec_shift6.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,14 @@ define <4 x i32> @test4(<4 x i32> %a) {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pslld $1, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test4:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: pslld $1, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: test4:
Expand Down
35 changes: 19 additions & 16 deletions llvm/test/CodeGen/X86/vec_smulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,10 @@ define <2 x i32> @smulo_v2i32(<2 x i32> %a0, <2 x i32> %a1, <2 x i32>* %p2) noun
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE41-NEXT: pmulld %xmm1, %xmm0
; SSE41-NEXT: movq %xmm0, (%rdi)
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
Expand Down Expand Up @@ -246,9 +247,10 @@ define <3 x i32> @smulo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; SSE41-NEXT: pmulld %xmm1, %xmm0
; SSE41-NEXT: pextrd $2, %xmm0, 8(%rdi)
; SSE41-NEXT: movq %xmm0, (%rdi)
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
Expand Down Expand Up @@ -378,9 +380,10 @@ define <4 x i32> @smulo_v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i32>* %p2) noun
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE41-NEXT: pmulld %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, (%rdi)
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: retq
;
Expand Down Expand Up @@ -3094,19 +3097,19 @@ define <4 x i32> @smulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE41-NEXT: pmulld %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: pslld $8, %xmm3
; SSE41-NEXT: psrad $8, %xmm3
; SSE41-NEXT: pcmpeqd %xmm1, %xmm3
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: pslld $8, %xmm0
; SSE41-NEXT: psrad $8, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
; SSE41-NEXT: pextrd $3, %xmm1, %eax
; SSE41-NEXT: pextrd $2, %xmm1, %ecx
; SSE41-NEXT: pextrd $1, %xmm1, %edx
; SSE41-NEXT: movd %xmm1, %esi
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: pxor %xmm3, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: pxor %xmm2, %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: movw %ax, 9(%rdi)
; SSE41-NEXT: movw %cx, 6(%rdi)
Expand Down
49 changes: 24 additions & 25 deletions llvm/test/CodeGen/X86/vec_ssubo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -826,11 +826,11 @@ define <4 x i32> @ssubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE2-NEXT: pslld $8, %xmm2
; SSE2-NEXT: psrad $8, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: pslld $8, %xmm0
; SSE2-NEXT: psrad $8, %xmm0
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: pslld $8, %xmm1
; SSE2-NEXT: psrad $8, %xmm1
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: movw %ax, (%rdi)
Expand Down Expand Up @@ -861,11 +861,11 @@ define <4 x i32> @ssubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSSE3-NEXT: pslld $8, %xmm2
; SSSE3-NEXT: psrad $8, %xmm2
; SSSE3-NEXT: psubd %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: pslld $8, %xmm0
; SSSE3-NEXT: psrad $8, %xmm0
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
; SSSE3-NEXT: movdqa %xmm2, %xmm1
; SSSE3-NEXT: pslld $8, %xmm1
; SSSE3-NEXT: psrad $8, %xmm1
; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
; SSSE3-NEXT: pcmpeqd %xmm0, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm0
; SSSE3-NEXT: movd %xmm2, %eax
; SSSE3-NEXT: movw %ax, (%rdi)
Expand All @@ -890,25 +890,24 @@ define <4 x i32> @ssubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
;
; SSE41-LABEL: ssubo_v4i24:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pslld $8, %xmm1
; SSE41-NEXT: psrad $8, %xmm1
; SSE41-NEXT: pslld $8, %xmm2
; SSE41-NEXT: psrad $8, %xmm2
; SSE41-NEXT: psubd %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: pslld $8, %xmm0
; SSE41-NEXT: psrad $8, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
; SSE41-NEXT: psubd %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pslld $8, %xmm2
; SSE41-NEXT: psrad $8, %xmm2
; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pextrd $3, %xmm2, %eax
; SSE41-NEXT: pxor %xmm2, %xmm1
; SSE41-NEXT: pextrd $3, %xmm0, %eax
; SSE41-NEXT: movw %ax, 9(%rdi)
; SSE41-NEXT: pextrd $2, %xmm2, %ecx
; SSE41-NEXT: pextrd $2, %xmm0, %ecx
; SSE41-NEXT: movw %cx, 6(%rdi)
; SSE41-NEXT: pextrd $1, %xmm2, %edx
; SSE41-NEXT: pextrd $1, %xmm0, %edx
; SSE41-NEXT: movw %dx, 3(%rdi)
; SSE41-NEXT: movd %xmm2, %esi
; SSE41-NEXT: movd %xmm0, %esi
; SSE41-NEXT: movw %si, (%rdi)
; SSE41-NEXT: shrl $16, %eax
; SSE41-NEXT: movb %al, 11(%rdi)
Expand All @@ -918,6 +917,7 @@ define <4 x i32> @ssubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; SSE41-NEXT: movb %dl, 5(%rdi)
; SSE41-NEXT: shrl $16, %esi
; SSE41-NEXT: movb %sil, 2(%rdi)
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: ssubo_v4i24:
Expand Down Expand Up @@ -998,11 +998,10 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; SSE-NEXT: pslld $31, %xmm1
; SSE-NEXT: movmskps %xmm1, %eax
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pxor %xmm0, %xmm1
; SSE-NEXT: pcmpeqd %xmm1, %xmm0
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: movb %al, (%rdi)
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ssubo_v4i1:
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/X86/vec_umulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2740,29 +2740,29 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
;
; SSE41-LABEL: umulo_v4i24:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE41-NEXT: pmuludq %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; SSE41-NEXT: pand %xmm0, %xmm2
; SSE41-NEXT: pand %xmm0, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; SSE41-NEXT: pmuludq %xmm0, %xmm3
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: pmuludq %xmm1, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
; SSE41-NEXT: pmuludq %xmm1, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: pcmpeqd %xmm3, %xmm4
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm4, %xmm0
; SSE41-NEXT: pmulld %xmm2, %xmm1
; SSE41-NEXT: pcmpeqd %xmm3, %xmm2
; SSE41-NEXT: pcmpeqd %xmm4, %xmm4
; SSE41-NEXT: pxor %xmm2, %xmm4
; SSE41-NEXT: pmulld %xmm0, %xmm1
; SSE41-NEXT: pextrd $3, %xmm1, %eax
; SSE41-NEXT: pextrd $2, %xmm1, %ecx
; SSE41-NEXT: pextrd $1, %xmm1, %edx
; SSE41-NEXT: movd %xmm1, %esi
; SSE41-NEXT: psrld $24, %xmm1
; SSE41-NEXT: pcmpgtd %xmm3, %xmm1
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrld $24, %xmm0
; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: movw %ax, 9(%rdi)
; SSE41-NEXT: movw %cx, 6(%rdi)
; SSE41-NEXT: movw %dx, 3(%rdi)
Expand Down
28 changes: 7 additions & 21 deletions llvm/test/CodeGen/X86/vector-bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
define i8 @test_bitreverse_i8(i8 %a) nounwind {
; SSE-LABEL: test_bitreverse_i8:
; SSE: # %bb.0:
; SSE-NEXT: # kill: def $edi killed $edi def $rdi
; SSE-NEXT: rolb $4, %dil
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: andb $51, %al
Expand All @@ -32,13 +31,11 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; SSE-NEXT: addb %al, %al
; SSE-NEXT: shrb %dil
; SSE-NEXT: andb $85, %dil
; SSE-NEXT: addl %edi, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: orb %dil, %al
; SSE-NEXT: retq
;
; AVX-LABEL: test_bitreverse_i8:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $edi killed $edi def $rdi
; AVX-NEXT: rolb $4, %dil
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: andb $51, %al
Expand All @@ -51,8 +48,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; AVX-NEXT: addb %al, %al
; AVX-NEXT: shrb %dil
; AVX-NEXT: andb $85, %dil
; AVX-NEXT: addl %edi, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: orb %dil, %al
; AVX-NEXT: retq
;
; XOP-LABEL: test_bitreverse_i8:
Expand All @@ -65,7 +61,6 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
;
; GFNISSE-LABEL: test_bitreverse_i8:
; GFNISSE: # %bb.0:
; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi
; GFNISSE-NEXT: rolb $4, %dil
; GFNISSE-NEXT: movl %edi, %eax
; GFNISSE-NEXT: andb $51, %al
Expand All @@ -78,13 +73,11 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; GFNISSE-NEXT: addb %al, %al
; GFNISSE-NEXT: shrb %dil
; GFNISSE-NEXT: andb $85, %dil
; GFNISSE-NEXT: addl %edi, %eax
; GFNISSE-NEXT: # kill: def $al killed $al killed $eax
; GFNISSE-NEXT: orb %dil, %al
; GFNISSE-NEXT: retq
;
; GFNIAVX-LABEL: test_bitreverse_i8:
; GFNIAVX: # %bb.0:
; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi
; GFNIAVX-NEXT: rolb $4, %dil
; GFNIAVX-NEXT: movl %edi, %eax
; GFNIAVX-NEXT: andb $51, %al
Expand All @@ -97,13 +90,11 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; GFNIAVX-NEXT: addb %al, %al
; GFNIAVX-NEXT: shrb %dil
; GFNIAVX-NEXT: andb $85, %dil
; GFNIAVX-NEXT: addl %edi, %eax
; GFNIAVX-NEXT: # kill: def $al killed $al killed $eax
; GFNIAVX-NEXT: orb %dil, %al
; GFNIAVX-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_i8:
; GFNIAVX2: # %bb.0:
; GFNIAVX2-NEXT: # kill: def $edi killed $edi def $rdi
; GFNIAVX2-NEXT: rolb $4, %dil
; GFNIAVX2-NEXT: movl %edi, %eax
; GFNIAVX2-NEXT: andb $51, %al
Expand All @@ -116,13 +107,11 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; GFNIAVX2-NEXT: addb %al, %al
; GFNIAVX2-NEXT: shrb %dil
; GFNIAVX2-NEXT: andb $85, %dil
; GFNIAVX2-NEXT: addl %edi, %eax
; GFNIAVX2-NEXT: # kill: def $al killed $al killed $eax
; GFNIAVX2-NEXT: orb %dil, %al
; GFNIAVX2-NEXT: retq
;
; GFNIAVX512F-LABEL: test_bitreverse_i8:
; GFNIAVX512F: # %bb.0:
; GFNIAVX512F-NEXT: # kill: def $edi killed $edi def $rdi
; GFNIAVX512F-NEXT: rolb $4, %dil
; GFNIAVX512F-NEXT: movl %edi, %eax
; GFNIAVX512F-NEXT: andb $51, %al
Expand All @@ -135,13 +124,11 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; GFNIAVX512F-NEXT: addb %al, %al
; GFNIAVX512F-NEXT: shrb %dil
; GFNIAVX512F-NEXT: andb $85, %dil
; GFNIAVX512F-NEXT: addl %edi, %eax
; GFNIAVX512F-NEXT: # kill: def $al killed $al killed $eax
; GFNIAVX512F-NEXT: orb %dil, %al
; GFNIAVX512F-NEXT: retq
;
; GFNIAVX512BW-LABEL: test_bitreverse_i8:
; GFNIAVX512BW: # %bb.0:
; GFNIAVX512BW-NEXT: # kill: def $edi killed $edi def $rdi
; GFNIAVX512BW-NEXT: rolb $4, %dil
; GFNIAVX512BW-NEXT: movl %edi, %eax
; GFNIAVX512BW-NEXT: andb $51, %al
Expand All @@ -154,8 +141,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; GFNIAVX512BW-NEXT: addb %al, %al
; GFNIAVX512BW-NEXT: shrb %dil
; GFNIAVX512BW-NEXT: andb $85, %dil
; GFNIAVX512BW-NEXT: addl %edi, %eax
; GFNIAVX512BW-NEXT: # kill: def $al killed $al killed $eax
; GFNIAVX512BW-NEXT: orb %dil, %al
; GFNIAVX512BW-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/X86/vector-ext-logic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -348,21 +348,21 @@ define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) {
define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
; SSE2-LABEL: bool_sext_and:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pand %xmm3, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
Expand All @@ -386,21 +386,21 @@ define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
; SSE2-LABEL: bool_sext_or:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: por %xmm0, %xmm1
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: por %xmm3, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
Expand All @@ -424,21 +424,21 @@ define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) {
; SSE2-LABEL: bool_sext_xor:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pxor %xmm3, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
Expand Down
231 changes: 114 additions & 117 deletions llvm/test/CodeGen/X86/vector-fshl-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; SSE2-NEXT: psrlq %xmm4, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; SSE2-NEXT: psrlq %xmm4, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm5[0],xmm1[1]
; SSE2-NEXT: shufpd{{.*#+}} xmm5 = xmm5[0],xmm1[1]
; SSE2-NEXT: pand %xmm3, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: psllq %xmm2, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psllq %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; SSE2-NEXT: psllq %xmm2, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; SSE2-NEXT: orpd %xmm1, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: orpd %xmm5, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: var_funnnel_v2i64:
Expand All @@ -55,14 +55,14 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; SSE41-NEXT: psrlq %xmm4, %xmm5
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; SSE41-NEXT: psrlq %xmm4, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: pand %xmm3, %xmm2
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: psllq %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psllq %xmm2, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; SSE41-NEXT: psllq %xmm2, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: por %xmm5, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: var_funnnel_v2i64:
Expand Down Expand Up @@ -178,22 +178,22 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
;
; X86-SSE2-LABEL: var_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0]
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
; X86-SSE2-NEXT: pandn %xmm3, %xmm4
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,0,63,0]
; X86-SSE2-NEXT: movdqa %xmm2, %xmm5
; X86-SSE2-NEXT: pandn %xmm4, %xmm5
; X86-SSE2-NEXT: psrlq $1, %xmm1
; X86-SSE2-NEXT: movdqa %xmm1, %xmm5
; X86-SSE2-NEXT: psrlq %xmm4, %xmm5
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; X86-SSE2-NEXT: psrlq %xmm4, %xmm1
; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm5[0],xmm1[1]
; X86-SSE2-NEXT: pand %xmm3, %xmm2
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
; X86-SSE2-NEXT: psllq %xmm2, %xmm3
; X86-SSE2-NEXT: movdqa %xmm1, %xmm3
; X86-SSE2-NEXT: psrlq %xmm5, %xmm3
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
; X86-SSE2-NEXT: psrlq %xmm5, %xmm1
; X86-SSE2-NEXT: shufpd {{.*#+}} xmm3 = xmm3[0],xmm1[1]
; X86-SSE2-NEXT: pand %xmm4, %xmm2
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psllq %xmm2, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; X86-SSE2-NEXT: psllq %xmm2, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X86-SSE2-NEXT: orpd %xmm1, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-SSE2-NEXT: orpd %xmm3, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
ret <2 x i64> %res
Expand Down Expand Up @@ -254,14 +254,14 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; SSE41-NEXT: psrld %xmm4, %xmm6
; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,1,1,1,4,5,6,7]
; SSE41-NEXT: psrld %xmm4, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm6[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm6 = xmm6[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm6 = xmm6[0,1],xmm3[2,3],xmm6[4,5],xmm3[6,7]
; SSE41-NEXT: pand %xmm8, %xmm2
; SSE41-NEXT: pslld $23, %xmm2
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: cvttps2dq %xmm2, %xmm2
; SSE41-NEXT: pmulld %xmm2, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: cvttps2dq %xmm2, %xmm1
; SSE41-NEXT: pmulld %xmm1, %xmm0
; SSE41-NEXT: por %xmm6, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: var_funnnel_v4i32:
Expand Down Expand Up @@ -453,32 +453,31 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; SSE2-NEXT: por %xmm5, %xmm3
; SSE2-NEXT: paddw %xmm4, %xmm4
; SSE2-NEXT: psraw $15, %xmm4
; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: pandn %xmm3, %xmm5
; SSE2-NEXT: movdqa %xmm4, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm1
; SSE2-NEXT: psrlw $1, %xmm3
; SSE2-NEXT: pand %xmm4, %xmm3
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pslld $23, %xmm1
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
; SSE2-NEXT: paddd %xmm4, %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,2,2,3]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pslld $23, %xmm4
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
; SSE2-NEXT: paddd %xmm5, %xmm4
; SSE2-NEXT: cvttps2dq %xmm4, %xmm4
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pslld $23, %xmm2
; SSE2-NEXT: paddd %xmm4, %xmm2
; SSE2-NEXT: cvttps2dq %xmm2, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm6[0]
; SSE2-NEXT: pmullw %xmm0, %xmm1
; SSE2-NEXT: por %xmm5, %xmm1
; SSE2-NEXT: por %xmm3, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: paddd %xmm5, %xmm2
; SSE2-NEXT: cvttps2dq %xmm2, %xmm2
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
; SSE2-NEXT: pmullw %xmm2, %xmm0
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: por %xmm3, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: var_funnnel_v8i16:
Expand Down Expand Up @@ -522,9 +521,8 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; SSE41-NEXT: paddd %xmm4, %xmm0
; SSE41-NEXT: cvttps2dq %xmm0, %xmm0
; SSE41-NEXT: packusdw %xmm2, %xmm0
; SSE41-NEXT: pmullw %xmm0, %xmm3
; SSE41-NEXT: por %xmm1, %xmm3
; SSE41-NEXT: movdqa %xmm3, %xmm0
; SSE41-NEXT: pmullw %xmm3, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: var_funnnel_v8i16:
Expand Down Expand Up @@ -699,32 +697,31 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; X86-SSE2-NEXT: por %xmm5, %xmm3
; X86-SSE2-NEXT: paddw %xmm4, %xmm4
; X86-SSE2-NEXT: psraw $15, %xmm4
; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
; X86-SSE2-NEXT: pandn %xmm3, %xmm5
; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
; X86-SSE2-NEXT: pandn %xmm3, %xmm1
; X86-SSE2-NEXT: psrlw $1, %xmm3
; X86-SSE2-NEXT: pand %xmm4, %xmm3
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
; X86-SSE2-NEXT: pslld $23, %xmm1
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
; X86-SSE2-NEXT: paddd %xmm4, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
; X86-SSE2-NEXT: pslld $23, %xmm4
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
; X86-SSE2-NEXT: paddd %xmm5, %xmm4
; X86-SSE2-NEXT: cvttps2dq %xmm4, %xmm4
; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,6,6,7]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; X86-SSE2-NEXT: pslld $23, %xmm2
; X86-SSE2-NEXT: paddd %xmm4, %xmm2
; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1
; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm6[0]
; X86-SSE2-NEXT: pmullw %xmm0, %xmm1
; X86-SSE2-NEXT: por %xmm5, %xmm1
; X86-SSE2-NEXT: por %xmm3, %xmm1
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
; X86-SSE2-NEXT: paddd %xmm5, %xmm2
; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm2
; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
; X86-SSE2-NEXT: pmullw %xmm2, %xmm0
; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: por %xmm3, %xmm0
; X86-SSE2-NEXT: retl
%res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
ret <8 x i16> %res
Expand Down Expand Up @@ -1171,23 +1168,23 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0]
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
; X86-SSE2-NEXT: pandn %xmm3, %xmm4
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,1,0,1]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,0,63,0]
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
; X86-SSE2-NEXT: pandn %xmm4, %xmm5
; X86-SSE2-NEXT: psrlq $1, %xmm1
; X86-SSE2-NEXT: movdqa %xmm1, %xmm5
; X86-SSE2-NEXT: psrlq %xmm4, %xmm5
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; X86-SSE2-NEXT: psrlq %xmm4, %xmm1
; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm5[0],xmm1[1]
; X86-SSE2-NEXT: pand %xmm3, %xmm2
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
; X86-SSE2-NEXT: psllq %xmm2, %xmm3
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; X86-SSE2-NEXT: psllq %xmm2, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X86-SSE2-NEXT: orpd %xmm1, %xmm0
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
; X86-SSE2-NEXT: psrlq %xmm5, %xmm2
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
; X86-SSE2-NEXT: psrlq %xmm5, %xmm1
; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; X86-SSE2-NEXT: pand %xmm4, %xmm3
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psllq %xmm3, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; X86-SSE2-NEXT: psllq %xmm3, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-SSE2-NEXT: orpd %xmm2, %xmm0
; X86-SSE2-NEXT: retl
%splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %splat)
Expand Down Expand Up @@ -2066,25 +2063,25 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrlq $60, %xmm2
; SSE2-NEXT: psrlq $50, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psllq $4, %xmm2
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psllq $4, %xmm1
; SSE2-NEXT: psllq $14, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; SSE2-NEXT: orpd %xmm1, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: orpd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: constant_funnnel_v2i64:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: psrlq $50, %xmm2
; SSE41-NEXT: psrlq $60, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psllq $14, %xmm2
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psllq $14, %xmm1
; SSE41-NEXT: psllq $4, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: por %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: constant_funnnel_v2i64:
Expand Down Expand Up @@ -2164,23 +2161,23 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; X86-SSE2-LABEL: constant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [63,0,63,0]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = <4,u,14,u>
; X86-SSE2-NEXT: movdqa %xmm3, %xmm4
; X86-SSE2-NEXT: pandn %xmm2, %xmm4
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <4,u,14,u>
; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
; X86-SSE2-NEXT: pandn %xmm3, %xmm5
; X86-SSE2-NEXT: psrlq $1, %xmm1
; X86-SSE2-NEXT: movdqa %xmm1, %xmm5
; X86-SSE2-NEXT: psrlq %xmm4, %xmm5
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
; X86-SSE2-NEXT: psrlq %xmm4, %xmm1
; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm5[0],xmm1[1]
; X86-SSE2-NEXT: pand %xmm2, %xmm3
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
; X86-SSE2-NEXT: psllq %xmm3, %xmm2
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
; X86-SSE2-NEXT: psrlq %xmm5, %xmm2
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
; X86-SSE2-NEXT: psrlq %xmm5, %xmm1
; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; X86-SSE2-NEXT: pand %xmm3, %xmm4
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
; X86-SSE2-NEXT: psllq %xmm4, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3]
; X86-SSE2-NEXT: psllq %xmm3, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; X86-SSE2-NEXT: orpd %xmm1, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-SSE2-NEXT: orpd %xmm2, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> <i64 4, i64 14>)
ret <2 x i64> %res
Expand Down Expand Up @@ -2220,10 +2217,10 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: psrld $26, %xmm2
; SSE41-NEXT: psrld $28, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: por %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: constant_funnnel_v4i32:
Expand Down
Loading