Expand Up
@@ -597,114 +597,112 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: paddq %xmm0, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %r15, %r12
; X64-NEXT: shldq $31, %rbx, %r12
; X64-NEXT: movq %rbx, %rbp
; X64-NEXT: sarq $63, %rbp
; X64-NEXT: shldq $31, %rbx, %rbp
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pcmpgtd %xmm1, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shlq $31, %rbx
; X64-NEXT: movq %rbx, %rdi
; X64-NEXT: movq %r12, %rsi
; X64-NEXT: movq %r14, %rcx
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %rbp, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %rbp
; X64-NEXT: movq %rbx, %rdi
; X64-NEXT: movq %r12, %rsi
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %rbp, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r14 , %rcx
; X64-NEXT: movq %r15 , %rcx
; X64-NEXT: callq __modti3
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shlq $31, %r15
; X64-NEXT: shrq $63, %r15
; X64-NEXT: xorl %r14d, %r15d
; X64-NEXT: testb %r15b, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rdx, %r13
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: cmovbq %r13, %rax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: testq %rbp , %rbp
; X64-NEXT: testq %r14 , %r14
; X64-NEXT: cmovnsq %rdx, %r13
; X64-NEXT: cmoveq %rax, %r13
; X64-NEXT: cmovnsq %rcx, %rbp
; X64-NEXT: cmovnsq %rcx, %r14
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: cmovaq %r13, %rax
; X64-NEXT: testq %rbp , %rbp
; X64-NEXT: testq %r14 , %r14
; X64-NEXT: cmovsq %rcx, %r13
; X64-NEXT: cmpq $-1, %rbp
; X64-NEXT: cmpq $-1, %r14
; X64-NEXT: cmoveq %rax, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,0,1]
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: shldq $31, %rbp, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,0,1]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shlq $31, %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: sarq $63, %rbp
; X64-NEXT: movq %rbx, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r14 , %rcx
; X64-NEXT: movq %rbp , %rcx
; X64-NEXT: callq __divti3
; X64-NEXT: movq %rax, %r12
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r12
; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r14 , %rcx
; X64-NEXT: movq %rbp , %rcx
; X64-NEXT: callq __modti3
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shlq $31, %rbx
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r14d, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: cmovbq %r12, %rax
; X64-NEXT: testq %r15 , %r15
; X64-NEXT: testq %r14 , %r14
; X64-NEXT: cmovnsq %rcx, %r12
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovnsq %rax, %r15
; X64-NEXT: cmovnsq %rax, %r14
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: cmovaq %r12, %rax
; X64-NEXT: testq %r15 , %r15
; X64-NEXT: testq %r14 , %r14
; X64-NEXT: cmovsq %rcx, %r12
; X64-NEXT: cmpq $-1, %r15
; X64-NEXT: cmpq $-1, %r14
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movq %r12, %xmm0
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
Expand All
@@ -718,11 +716,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: paddq %xmm1, %xmm1
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: movq %r14, %r13
; X64-NEXT: shldq $31, %rbp, %r13
; X64-NEXT: movq %xmm1, %rbx
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: sarq $63, %r12
; X64-NEXT: shldq $31, %rbx, %r12
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; X64-NEXT: # xmm1 = mem[2,3,0,1]
; X64-NEXT: pxor %xmm0, %xmm0
Expand All
@@ -731,104 +728,103 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: shlq $31, %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: sarq $63, %rbp
; X64-NEXT: movq %rbx, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r12, %rsi
; X64-NEXT: movq %rbp, %rcx
; X64-NEXT: callq __divti3
; X64-NEXT: movq %rax, %r12
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r12
; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r12, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %rbx , %rcx
; X64-NEXT: movq %rbp , %rcx
; X64-NEXT: callq __modti3
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shlq $31, %r14
; X64-NEXT: shrq $63, %r14
; X64-NEXT: xorl %ebx, %r14d
; X64-NEXT: testb %r14b, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: cmovbq %r12 , %rax
; X64-NEXT: testq %r15 , %r15
; X64-NEXT: cmovnsq %rcx, %r12
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: cmovbq %r13 , %rax
; X64-NEXT: testq %r14 , %r14
; X64-NEXT: cmovnsq %rcx, %r13
; X64-NEXT: cmoveq %rax, %r13
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovnsq %rax, %r15
; X64-NEXT: cmovnsq %rax, %r14
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: cmovaq %r12 , %rax
; X64-NEXT: testq %r15 , %r15
; X64-NEXT: cmovsq %rcx, %r12
; X64-NEXT: cmpq $-1, %r15
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movq %r12 , %xmm0
; X64-NEXT: cmovaq %r13 , %rax
; X64-NEXT: testq %r14 , %r14
; X64-NEXT: cmovsq %rcx, %r13
; X64-NEXT: cmpq $-1, %r14
; X64-NEXT: cmoveq %rax, %r13
; X64-NEXT: movq %r13 , %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,0,1]
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: shldq $31, %rbp, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,0,1]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shlq $31, %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: sarq $63, %rbp
; X64-NEXT: movq %rbx, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r14 , %rcx
; X64-NEXT: movq %rbp , %rcx
; X64-NEXT: callq __divti3
; X64-NEXT: movq %rax, %r12
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %r12
; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r14 , %rcx
; X64-NEXT: movq %rbp , %rcx
; X64-NEXT: callq __modti3
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shlq $31, %rbx
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r14d, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
; X64-NEXT: cmovbq %r12, %rax
; X64-NEXT: testq %r15 , %r15
; X64-NEXT: testq %r14 , %r14
; X64-NEXT: cmovnsq %rcx, %r12
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovnsq %rax, %r15
; X64-NEXT: cmovnsq %rax, %r14
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rcx, %r12
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: cmovaq %r12, %rax
; X64-NEXT: testq %r15 , %r15
; X64-NEXT: testq %r14 , %r14
; X64-NEXT: cmovsq %rcx, %r12
; X64-NEXT: cmpq $-1, %r15
; X64-NEXT: cmpq $-1, %r14
; X64-NEXT: cmoveq %rax, %r12
; X64-NEXT: movq %r12, %xmm0
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
Expand Down