202 changes: 101 additions & 101 deletions llvm/test/CodeGen/X86/pr38539.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ define void @f() nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $160, %esp
; X86-NEXT: subl $176, %esp
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
Expand All @@ -47,54 +47,55 @@ define void @f() nounwind {
; X86-NEXT: testl %edi, %edi
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %BB_udiv-special-cases
; X86-NEXT: bsrl %esi, %eax
; X86-NEXT: xorl $31, %eax
; X86-NEXT: addl $32, %eax
; X86-NEXT: bsrl %esi, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: jmp .LBB0_3
; X86-NEXT: .LBB0_1:
; X86-NEXT: bsrl %edi, %eax
; X86-NEXT: xorl $31, %eax
; X86-NEXT: bsrl %edi, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: .LBB0_3: # %BB_udiv-special-cases
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: testl %edx, %edx
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: jne .LBB0_4
; X86-NEXT: # %bb.5: # %BB_udiv-special-cases
; X86-NEXT: addl $64, %eax
; X86-NEXT: addl $64, %ecx
; X86-NEXT: jmp .LBB0_6
; X86-NEXT: .LBB0_4:
; X86-NEXT: bsrl %edx, %eax
; X86-NEXT: xorl $31, %eax
; X86-NEXT: addl $32, %eax
; X86-NEXT: bsrl %edx, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: .LBB0_6: # %BB_udiv-special-cases
; X86-NEXT: subl $62, %eax
; X86-NEXT: subl $62, %ecx
; X86-NEXT: movl $0, %ebx
; X86-NEXT: sbbl %ebx, %ebx
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: addl $-66, %eax
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: addl $-66, %ecx
; X86-NEXT: adcl $-1, %ebx
; X86-NEXT: adcl $3, %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: movb $1, %cl
; X86-NEXT: testb %cl, %cl
; X86-NEXT: adcl $3, %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movb $1, %al
; X86-NEXT: testb %al, %al
; X86-NEXT: jne .LBB0_11
; X86-NEXT: # %bb.7: # %BB_udiv-special-cases
; X86-NEXT: andl $3, %esi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $65, %ecx
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %ebx, %ecx
; X86-NEXT: andl $3, %edi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: xorl $65, %eax
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %edi, %eax
; X86-NEXT: orl %ebx, %eax
; X86-NEXT: je .LBB0_11
; X86-NEXT: # %bb.8: # %udiv-bb1
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: addl $1, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: andl $3, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: adcl $0, %esi
; X86-NEXT: andl $3, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movb $65, %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movb %cl, %ch
Expand All @@ -111,31 +112,29 @@ define void @f() nounwind {
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 120(%esp,%eax), %edi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 136(%esp,%eax), %edx
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shll %cl, %edi
; X86-NEXT: shll %cl, %edx
; X86-NEXT: notb %cl
; X86-NEXT: movl 112(%esp,%eax), %esi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 116(%esp,%eax), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: movl 128(%esp,%eax), %edi
; X86-NEXT: movl 132(%esp,%eax), %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shldl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll %cl, %esi
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll %cl, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %ebx, %ecx
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: orl %ebx, %ecx
; X86-NEXT: je .LBB0_11
; X86-NEXT: # %bb.9: # %udiv-preheader
; X86-NEXT: orl %eax, %edi
; X86-NEXT: andl $3, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %eax, %edx
; X86-NEXT: andl $3, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
Expand All @@ -150,20 +149,20 @@ define void @f() nounwind {
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $15, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 64(%esp,%eax), %edi
; X86-NEXT: movl 68(%esp,%eax), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movzbl %al, %esi
; X86-NEXT: movl 80(%esp,%esi), %edx
; X86-NEXT: movl 84(%esp,%esi), %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shrl %cl, %esi
; X86-NEXT: shrl %cl, %edi
; X86-NEXT: notb %cl
; X86-NEXT: movl 72(%esp,%eax), %ebx
; X86-NEXT: addl %ebx, %ebx
; X86-NEXT: shll %cl, %ebx
; X86-NEXT: orl %esi, %ebx
; X86-NEXT: movl 88(%esp,%esi), %esi
; X86-NEXT: addl %esi, %esi
; X86-NEXT: shll %cl, %esi
; X86-NEXT: orl %edi, %esi
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shrdl %cl, %edx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl $-1, %eax
Expand All @@ -180,62 +179,63 @@ define void @f() nounwind {
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB0_10: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: shldl $1, %ebx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: shldl $1, %ebx, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, %edx
; X86-NEXT: andl $2, %edx
; X86-NEXT: shrl %edx
; X86-NEXT: leal (%edx,%ebx,2), %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl $1, %esi, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: shldl $1, %edx, %esi
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: andl $2, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%edx,2), %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: shldl $1, %edi, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: orl %esi, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shldl $1, %eax, %edx
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl $1, %eax, %edi
; X86-NEXT: orl %esi, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %eax, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl $3, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: sbbl %ecx, %esi
; X86-NEXT: shll $30, %esi
; X86-NEXT: movl %esi, %edx
; X86-NEXT: sarl $30, %edx
; X86-NEXT: sarl $31, %esi
; X86-NEXT: shrdl $1, %esi, %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %esi, %eax
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: subl %edx, %ebx
; X86-NEXT: andl $3, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %esi, %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: sbbl %eax, %ecx
; X86-NEXT: cmpl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: sbbl %ecx, %ebx
; X86-NEXT: shll $30, %ebx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: sarl $30, %eax
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: shrdl $1, %ebx, %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: andl $1, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: subl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %ebx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: sbbl %edi, %ecx
; X86-NEXT: andl $3, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl $-1, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: adcl $-1, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: adcl $3, %esi
; X86-NEXT: andl $3, %esi
; X86-NEXT: adcl $-1, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: adcl $3, %edi
; X86-NEXT: andl $3, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %esi, %eax
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %edx, %eax
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %edi, %eax
; X86-NEXT: orl %ebx, %eax
; X86-NEXT: jne .LBB0_10
; X86-NEXT: .LBB0_11: # %udiv-end
; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
Expand Down
31 changes: 16 additions & 15 deletions llvm/test/CodeGen/X86/pr62286.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@ define i64 @PR62286(i32 %a) {
; SSE-LABEL: PR62286:
; SSE: # %bb.0:
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,1,0]
; SSE-NEXT: paddd %xmm0, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,0]
; SSE-NEXT: paddd %xmm1, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: pcmpgtd %xmm0, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: pcmpgtd %xmm1, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: paddq %xmm0, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: pcmpgtd %xmm1, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-NEXT: paddq %xmm1, %xmm0
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; SSE-NEXT: paddq %xmm0, %xmm1
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: PR62286:
Expand All @@ -46,10 +47,10 @@ define i64 @PR62286(i32 %a) {
; AVX2-LABEL: PR62286:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm1
; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/scheduler-backtracking.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ define i256 @test1(i256 %a) nounwind {
; ILP: # %bb.0:
; ILP-NEXT: movq %rdi, %rax
; ILP-NEXT: leal (%rsi,%rsi), %ecx
; ILP-NEXT: addb $3, %cl
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
Expand All @@ -22,7 +23,6 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: addb $3, %cl
; ILP-NEXT: movl %ecx, %edx
; ILP-NEXT: shrb $3, %dl
; ILP-NEXT: andb $7, %cl
Expand Down
250 changes: 125 additions & 125 deletions llvm/test/CodeGen/X86/sdiv_fix_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -563,227 +563,227 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: subq $120, %rsp
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pxor %xmm3, %xmm3
; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3]
; X64-NEXT: psllq $32, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
; X64-NEXT: psrad $31, %xmm2
; X64-NEXT: psrlq $31, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shldq $31, %rbp, %r14
; X64-NEXT: movq %rbp, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pcmpgtd %xmm1, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r12
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: subq $1, %rbp
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3@PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shrq $63, %rbp
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rdx, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: cmpq %rdx, %rbp
; X64-NEXT: movq %r14, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: cmovgeq %rdx, %r13
; X64-NEXT: cmovgeq %rcx, %r12
; X64-NEXT: cmovgeq %rcx, %r14
; X64-NEXT: cmovgeq %rdx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %r13, %rcx
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: movq $-1, %rax
; X64-NEXT: sbbq %r12, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shldq $31, %rbp, %r14
; X64-NEXT: movq %rbp, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r12
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: subq $1, %rbp
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3@PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shrq $63, %rbp
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: cmpq %rcx, %rbp
; X64-NEXT: movq %r14, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovgeq %rax, %r12
; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %r13, %rcx
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, %rax
; X64-NEXT: sbbq %r12, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: psrlq $1, %xmm1
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; X64-NEXT: pshufd $212, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[0,1,1,3]
; X64-NEXT: psllq $32, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
; X64-NEXT: psrad $31, %xmm1
; X64-NEXT: psrlq $31, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shldq $31, %rbp, %r14
; X64-NEXT: movq %rbp, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pcmpgtd %xmm0, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r12
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: subq $1, %rbp
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3@PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shrq $63, %rbp
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: cmpq %rcx, %rbp
; X64-NEXT: movq %r14, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovgeq %rax, %r12
; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %r13, %rcx
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, %rax
; X64-NEXT: sbbq %r12, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shldq $31, %rbp, %r14
; X64-NEXT: movq %rbp, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r12
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: subq $1, %rbp
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3@PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shrq $63, %rbp
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: cmpq %rcx, %rbp
; X64-NEXT: movq %r14, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovgeq %rax, %r12
; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %r13, %rax
; X64-NEXT: sbbq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: cmovgeq %rax, %r13
; X64-NEXT: movq %r13, %xmm1
; X64-NEXT: cmpq %rbp, %rax
; X64-NEXT: sbbq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: cmovgeq %rax, %rbp
; X64-NEXT: movq %rbp, %xmm1
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: psrlq $1, %xmm0
Expand Down
78 changes: 34 additions & 44 deletions llvm/test/CodeGen/X86/setcc-non-simple-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,52 +46,47 @@ define void @failing(ptr %0, ptr %1) nounwind {
; CHECK-NEXT: movq 24(%rsi), %rcx
; CHECK-NEXT: movq 32(%rsi), %rdx
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1]
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2,2]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %vector.ph
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-NEXT: xorpd %xmm3, %xmm3
; CHECK-NEXT: movq $-1024, %rsi # imm = 0xFC00
; CHECK-NEXT: movq $-1024, %rdi # imm = 0xFC00
; CHECK-NEXT: movdqa %xmm0, %xmm4
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %vector.body
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: movdqu 1024(%rdx,%rsi), %xmm5
; CHECK-NEXT: movdqu 1040(%rdx,%rsi), %xmm6
; CHECK-NEXT: movq %xmm5, %rdi
; CHECK-NEXT: movq %xmm6, %r8
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
; CHECK-NEXT: movq %xmm5, %r9
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm6[2,3,2,3]
; CHECK-NEXT: movq %xmm5, %r10
; CHECK-NEXT: negq %r8
; CHECK-NEXT: cmpq 1024(%rdx,%rdi), %rsi
; CHECK-NEXT: movq %rcx, %r8
; CHECK-NEXT: sbbq %r10, %r8
; CHECK-NEXT: sbbq 1032(%rdx,%rdi), %r8
; CHECK-NEXT: setge %r8b
; CHECK-NEXT: movzbl %r8b, %r8d
; CHECK-NEXT: andl $1, %r8d
; CHECK-NEXT: negq %r8
; CHECK-NEXT: movq %r8, %xmm5
; CHECK-NEXT: negq %rdi
; CHECK-NEXT: movq %rcx, %rdi
; CHECK-NEXT: sbbq %r9, %rdi
; CHECK-NEXT: setge %dil
; CHECK-NEXT: movzbl %dil, %edi
; CHECK-NEXT: negq %rdi
; CHECK-NEXT: movq %rdi, %xmm6
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0]
; CHECK-NEXT: movdqa %xmm1, %xmm5
; CHECK-NEXT: psllq %xmm4, %xmm5
; CHECK-NEXT: cmpq 1040(%rdx,%rdi), %rsi
; CHECK-NEXT: movq %rcx, %r8
; CHECK-NEXT: sbbq 1048(%rdx,%rdi), %r8
; CHECK-NEXT: setge %r8b
; CHECK-NEXT: movzbl %r8b, %r8d
; CHECK-NEXT: andl $1, %r8d
; CHECK-NEXT: negq %r8
; CHECK-NEXT: movq %r8, %xmm6
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
; CHECK-NEXT: movdqa %xmm1, %xmm6
; CHECK-NEXT: psllq %xmm4, %xmm6
; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3]
; CHECK-NEXT: movdqa %xmm1, %xmm8
; CHECK-NEXT: psllq %xmm7, %xmm8
; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm5[0],xmm8[1]
; CHECK-NEXT: andpd %xmm6, %xmm8
; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm6[0],xmm8[1]
; CHECK-NEXT: andpd %xmm5, %xmm8
; CHECK-NEXT: orpd %xmm8, %xmm3
; CHECK-NEXT: paddq %xmm2, %xmm4
; CHECK-NEXT: addq $32, %rsi
; CHECK-NEXT: addq $32, %rdi
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.3: # %middle.block
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
Expand All @@ -106,47 +101,42 @@ define void @failing(ptr %0, ptr %1) nounwind {
; CHECK-AVX2-NEXT: movq 24(%rsi), %rcx
; CHECK-AVX2-NEXT: movq 32(%rsi), %rdx
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm0 = [0,1]
; CHECK-AVX2-NEXT: xorl %esi, %esi
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,1]
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [2,2]
; CHECK-AVX2-NEXT: .p2align 4, 0x90
; CHECK-AVX2-NEXT: .LBB0_1: # %vector.ph
; CHECK-AVX2-NEXT: # =>This Loop Header: Depth=1
; CHECK-AVX2-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-AVX2-NEXT: movq $-1024, %rsi # imm = 0xFC00
; CHECK-AVX2-NEXT: movq $-1024, %rdi # imm = 0xFC00
; CHECK-AVX2-NEXT: vmovdqa %xmm0, %xmm4
; CHECK-AVX2-NEXT: .p2align 4, 0x90
; CHECK-AVX2-NEXT: .LBB0_2: # %vector.body
; CHECK-AVX2-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-AVX2-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-AVX2-NEXT: vmovdqu 1024(%rdx,%rsi), %xmm5
; CHECK-AVX2-NEXT: vmovdqu 1040(%rdx,%rsi), %xmm6
; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm5[0],xmm6[0]
; CHECK-AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
; CHECK-AVX2-NEXT: vmovq %xmm5, %rdi
; CHECK-AVX2-NEXT: vpextrq $1, %xmm5, %r8
; CHECK-AVX2-NEXT: vmovq %xmm7, %r9
; CHECK-AVX2-NEXT: vpextrq $1, %xmm7, %r10
; CHECK-AVX2-NEXT: negq %r10
; CHECK-AVX2-NEXT: movq %rcx, %r10
; CHECK-AVX2-NEXT: sbbq %r8, %r10
; CHECK-AVX2-NEXT: cmpq 1024(%rdx,%rdi), %rsi
; CHECK-AVX2-NEXT: movq %rcx, %r8
; CHECK-AVX2-NEXT: sbbq 1032(%rdx,%rdi), %r8
; CHECK-AVX2-NEXT: setge %r8b
; CHECK-AVX2-NEXT: movzbl %r8b, %r8d
; CHECK-AVX2-NEXT: andl $1, %r8d
; CHECK-AVX2-NEXT: negq %r8
; CHECK-AVX2-NEXT: vmovq %r8, %xmm5
; CHECK-AVX2-NEXT: negq %r9
; CHECK-AVX2-NEXT: cmpq 1040(%rdx,%rdi), %rsi
; CHECK-AVX2-NEXT: movq %rcx, %r8
; CHECK-AVX2-NEXT: sbbq %rdi, %r8
; CHECK-AVX2-NEXT: setge %dil
; CHECK-AVX2-NEXT: movzbl %dil, %edi
; CHECK-AVX2-NEXT: negq %rdi
; CHECK-AVX2-NEXT: vmovq %rdi, %xmm6
; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm6[0],xmm5[0]
; CHECK-AVX2-NEXT: sbbq 1048(%rdx,%rdi), %r8
; CHECK-AVX2-NEXT: setge %r8b
; CHECK-AVX2-NEXT: movzbl %r8b, %r8d
; CHECK-AVX2-NEXT: andl $1, %r8d
; CHECK-AVX2-NEXT: negq %r8
; CHECK-AVX2-NEXT: vmovq %r8, %xmm6
; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
; CHECK-AVX2-NEXT: vpsllvq %xmm4, %xmm1, %xmm6
; CHECK-AVX2-NEXT: vpand %xmm6, %xmm5, %xmm5
; CHECK-AVX2-NEXT: vpor %xmm3, %xmm5, %xmm3
; CHECK-AVX2-NEXT: vpaddq %xmm2, %xmm4, %xmm4
; CHECK-AVX2-NEXT: addq $32, %rsi
; CHECK-AVX2-NEXT: addq $32, %rdi
; CHECK-AVX2-NEXT: jne .LBB0_2
; CHECK-AVX2-NEXT: # %bb.3: # %middle.block
; CHECK-AVX2-NEXT: # in Loop: Header=BB0_1 Depth=1
Expand Down
14 changes: 9 additions & 5 deletions llvm/test/CodeGen/X86/vec_saddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1045,12 +1045,16 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: saddo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k2
; AVX512-NEXT: kandw %k1, %k0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kshiftlw $12, %k2, %k0
; AVX512-NEXT: kshiftrw $12, %k0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
Expand Down
14 changes: 9 additions & 5 deletions llvm/test/CodeGen/X86/vec_ssubo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1062,12 +1062,16 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: ssubo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0
; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kshiftlw $12, %k0, %k0
; AVX512-NEXT: kshiftrw $12, %k0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
Expand Down
14 changes: 9 additions & 5 deletions llvm/test/CodeGen/X86/vec_uaddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1098,12 +1098,16 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: uaddo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k2
; AVX512-NEXT: kandw %k1, %k0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kshiftlw $12, %k2, %k0
; AVX512-NEXT: kshiftrw $12, %k0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
Expand Down
14 changes: 9 additions & 5 deletions llvm/test/CodeGen/X86/vec_usubo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1145,12 +1145,16 @@ define <4 x i32> @usubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: usubo_v4i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpslld $31, %xmm2, %xmm2
; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0
; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k0
; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: kshiftlw $12, %k0, %k0
; AVX512-NEXT: kshiftrw $12, %k0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: movb %al, (%rdi)
; AVX512-NEXT: retq
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/vector-bo-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3137,11 +3137,11 @@ define <8 x i64> @mul_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef
; AVX512-LABEL: mul_v8i64_cast_cond:
; AVX512: # %bb.0:
; AVX512-NEXT: kmovw %edi, %k1
; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm2
; AVX512-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm3
; AVX512-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
; AVX512-NEXT: vpaddq %zmm2, %zmm3, %zmm2
; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm2
; AVX512-NEXT: vpmuludq %zmm2, %zmm0, %zmm2
; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm3
; AVX512-NEXT: vpmuludq %zmm1, %zmm3, %zmm3
; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm2
; AVX512-NEXT: vpsllq $32, %zmm2, %zmm2
; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm1
; AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm0 {%k1}
Expand Down
86 changes: 43 additions & 43 deletions llvm/test/CodeGen/X86/vector-fshr-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; SSE41-NEXT: psrlq %xmm4, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: pandn %xmm3, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
; SSE41-NEXT: paddq %xmm0, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psllq %xmm2, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: psllq %xmm1, %xmm3
; SSE41-NEXT: psllq %xmm2, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
; SSE41-NEXT: por %xmm5, %xmm0
; SSE41-NEXT: retq
;
Expand All @@ -76,11 +76,11 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7]
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
Expand Down Expand Up @@ -158,13 +158,13 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
; XOPAVX1-LABEL: var_funnnel_v2i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlq %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshlq %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
Expand Down Expand Up @@ -366,13 +366,13 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
; XOPAVX1-LABEL: var_funnnel_v4i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshld %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
Expand Down Expand Up @@ -646,26 +646,26 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; XOPAVX1-LABEL: var_funnnel_v8i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubw %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshlw %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpshlw %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: var_funnnel_v8i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX2-NEXT: vpsubw %xmm4, %xmm5, %xmm4
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpaddw %xmm0, %xmm0, %xmm0
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
Expand Down Expand Up @@ -995,26 +995,26 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
; XOPAVX1-LABEL: var_funnnel_v16i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubb %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshlb %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshlb %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: var_funnnel_v16i8:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastb {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX2-NEXT: vpsubb %xmm4, %xmm5, %xmm4
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; XOPAVX2-NEXT: vpshlb %xmm4, %xmm0, %xmm0
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/vector-fshr-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -486,22 +486,22 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; XOPAVX2-LABEL: var_funnnel_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5
; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6
; XOPAVX2-NEXT: vpshlw %xmm5, %xmm6, %xmm5
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0
; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; XOPAVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6
; XOPAVX2-NEXT: vpsubw %xmm5, %xmm6, %xmm5
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm7
; XOPAVX2-NEXT: vpshlw %xmm5, %xmm7, %xmm5
; XOPAVX2-NEXT: vpsubw %xmm4, %xmm6, %xmm4
; XOPAVX2-NEXT: vpshlw %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1
; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; XOPAVX2-NEXT: vpsubw %xmm3, %xmm4, %xmm3
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
; XOPAVX2-NEXT: vpshlw %xmm3, %xmm5, %xmm3
; XOPAVX2-NEXT: vpsubw %xmm2, %xmm4, %xmm2
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
; XOPAVX2-NEXT: vpshlw %xmm3, %xmm4, %xmm3
; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-fshr-sub128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -185,13 +185,13 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
; XOPAVX1-LABEL: var_funnnel_v2i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4
; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
; XOPAVX1-NEXT: vpshld %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-shift-shl-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -927,9 +927,9 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE2-LABEL: constant_shift_v2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: paddq %xmm0, %xmm1
; SSE2-NEXT: psllq $7, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: psllq $7, %xmm1
; SSE2-NEXT: paddq %xmm0, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: constant_shift_v2i64:
Expand Down Expand Up @@ -975,9 +975,9 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; X86-SSE-LABEL: constant_shift_v2i64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movdqa %xmm0, %xmm1
; X86-SSE-NEXT: paddq %xmm0, %xmm1
; X86-SSE-NEXT: psllq $7, %xmm0
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-SSE-NEXT: psllq $7, %xmm1
; X86-SSE-NEXT: paddq %xmm0, %xmm0
; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; X86-SSE-NEXT: retl
%shift = shl <2 x i64> %a, <i64 1, i64 7>
ret <2 x i64> %shift
Expand Down