diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index b1edbf7d179db8..e57d0ffbc9409c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8709,6 +8709,10 @@ bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, // 3. Other forms of the same operation (intrinsics and other variants) bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { switch (Inst.getOpcode()) { + case X86::ADD8rr: + case X86::ADD16rr: + case X86::ADD32rr: + case X86::ADD64rr: case X86::AND8rr: case X86::AND16rr: case X86::AND32rr: diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll index d2cc1b3599cbc4..5c78092e9f2c46 100644 --- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -37,190 +37,190 @@ define fastcc i64 @foo() nounwind { ; CHECK-NEXT: addq %rbx, %r11 ; CHECK-NEXT: leaq (%rsi,%rdx), %rbx ; CHECK-NEXT: addq %rdi, %rbx +; CHECK-NEXT: addq %rdi, %r11 ; CHECK-NEXT: addq %rbx, %r11 ; CHECK-NEXT: addq %rax, %rax ; CHECK-NEXT: addq %r10, %rax -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %rdi, %r11 ; CHECK-NEXT: addq %r11, %r8 ; CHECK-NEXT: addq %r10, %rax ; CHECK-NEXT: addq %r11, %rax -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%rdi,%rsi), %r11 +; CHECK-NEXT: movq X(%rip), %r11 +; CHECK-NEXT: bswapq %r11 +; CHECK-NEXT: addq %rdx, %r11 +; CHECK-NEXT: leaq (%rdi,%rsi), %rdx +; CHECK-NEXT: addq %r8, %rdx ; CHECK-NEXT: addq %r8, %r11 -; CHECK-NEXT: addq %rdx, %rbx -; CHECK-NEXT: addq %r11, %rbx +; CHECK-NEXT: addq %rdx, %r11 ; CHECK-NEXT: leaq (%r10,%rcx), %rdx ; CHECK-NEXT: addq %rdx, %rdx ; CHECK-NEXT: addq %rax, %rdx -; CHECK-NEXT: movq X(%rip), %r11 -; CHECK-NEXT: addq %r8, %rbx -; CHECK-NEXT: addq %rbx, %r9 +; CHECK-NEXT: addq %r11, %r9 ; CHECK-NEXT: addq %rax, %rdx -; CHECK-NEXT: addq %rbx, %rdx +; CHECK-NEXT: addq %r11, %rdx +; CHECK-NEXT: movq X(%rip), %r11 ; CHECK-NEXT: bswapq %r11 -; CHECK-NEXT: leaq (%r8,%rdi), %rbx -; CHECK-NEXT: addq %r9, %rbx ; CHECK-NEXT: addq %rsi, %r11 -; CHECK-NEXT: addq %rbx, %r11 +; CHECK-NEXT: leaq (%r8,%rdi), %rsi +; CHECK-NEXT: addq %r9, %rsi +; CHECK-NEXT: addq %r9, %r11 +; CHECK-NEXT: addq %rsi, %r11 ; CHECK-NEXT: leaq (%rax,%r10), %rsi ; CHECK-NEXT: addq %rsi, %rsi ; CHECK-NEXT: addq %rdx, %rsi -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %r9, %r11 ; CHECK-NEXT: addq %r11, %rcx ; CHECK-NEXT: addq %rdx, %rsi ; CHECK-NEXT: addq %r11, %rsi -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%r9,%r8), %r11 +; CHECK-NEXT: movq X(%rip), %r11 +; CHECK-NEXT: bswapq %r11 +; CHECK-NEXT: addq %rdi, %r11 +; CHECK-NEXT: leaq (%r9,%r8), %rdi +; CHECK-NEXT: addq %rcx, %rdi ; CHECK-NEXT: addq %rcx, %r11 -; CHECK-NEXT: addq %rdi, %rbx -; CHECK-NEXT: addq %r11, %rbx +; CHECK-NEXT: addq %rdi, %r11 ; CHECK-NEXT: leaq (%rdx,%rax), %rdi ; CHECK-NEXT: addq %rdi, %rdi ; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: movq X(%rip), %r11 -; CHECK-NEXT: addq %rcx, %rbx -; CHECK-NEXT: addq %rbx, %r10 +; CHECK-NEXT: addq %r11, %r10 ; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: addq %rbx, %rdi +; CHECK-NEXT: addq %r11, %rdi +; CHECK-NEXT: movq X(%rip), %r11 ; CHECK-NEXT: bswapq %r11 -; CHECK-NEXT: leaq (%rcx,%r9), %rbx -; CHECK-NEXT: addq %r10, %rbx ; CHECK-NEXT: addq %r8, %r11 -; CHECK-NEXT: addq %rbx, %r11 +; CHECK-NEXT: leaq (%rcx,%r9), %r8 +; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: addq %r10, %r11 +; CHECK-NEXT: addq %r8, %r11 ; CHECK-NEXT: leaq (%rsi,%rdx), %r8 ; CHECK-NEXT: addq %r8, %r8 ; CHECK-NEXT: addq %rdi, %r8 -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %r10, %r11 ; CHECK-NEXT: addq %r11, %rax ; CHECK-NEXT: addq %rdi, %r8 ; CHECK-NEXT: addq %r11, %r8 -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%r10,%rcx), %r11 +; CHECK-NEXT: movq X(%rip), %r11 +; CHECK-NEXT: bswapq %r11 +; CHECK-NEXT: addq %r9, %r11 +; CHECK-NEXT: leaq (%r10,%rcx), %r9 +; CHECK-NEXT: addq %rax, %r9 ; CHECK-NEXT: addq %rax, %r11 -; CHECK-NEXT: addq %r9, %rbx -; CHECK-NEXT: addq %r11, %rbx +; CHECK-NEXT: addq %r9, %r11 ; CHECK-NEXT: leaq (%rdi,%rsi), %r9 ; CHECK-NEXT: addq %r9, %r9 ; CHECK-NEXT: addq %r8, %r9 -; CHECK-NEXT: movq X(%rip), %r11 -; CHECK-NEXT: addq %rax, %rbx -; CHECK-NEXT: addq %rbx, %rdx +; CHECK-NEXT: addq %r11, %rdx ; CHECK-NEXT: addq %r8, %r9 -; CHECK-NEXT: addq %rbx, %r9 +; CHECK-NEXT: addq %r11, %r9 +; CHECK-NEXT: movq X(%rip), %r11 ; CHECK-NEXT: bswapq %r11 -; CHECK-NEXT: leaq (%rax,%r10), %rbx -; CHECK-NEXT: addq %rdx, %rbx ; CHECK-NEXT: addq %rcx, %r11 -; CHECK-NEXT: addq %rbx, %r11 +; CHECK-NEXT: leaq (%rax,%r10), %rcx +; CHECK-NEXT: addq %rdx, %rcx +; CHECK-NEXT: addq %rdx, %r11 +; CHECK-NEXT: addq %rcx, %r11 ; CHECK-NEXT: leaq (%r8,%rdi), %rcx ; CHECK-NEXT: addq %rcx, %rcx ; CHECK-NEXT: addq %r9, %rcx -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %rdx, %r11 ; CHECK-NEXT: addq %r11, %rsi ; CHECK-NEXT: addq %r9, %rcx ; CHECK-NEXT: addq %r11, %rcx -; CHECK-NEXT: bswapq %rbx -; CHECK-NEXT: leaq (%rdx,%rax), %r11 +; CHECK-NEXT: movq X(%rip), %r11 +; CHECK-NEXT: bswapq %r11 +; CHECK-NEXT: addq %r10, %r11 +; CHECK-NEXT: leaq (%rdx,%rax), %r10 +; CHECK-NEXT: addq %rsi, %r10 ; CHECK-NEXT: addq %rsi, %r11 -; CHECK-NEXT: addq %r10, %rbx -; CHECK-NEXT: addq %r11, %rbx +; CHECK-NEXT: addq %r10, %r11 ; CHECK-NEXT: leaq (%r9,%r8), %r10 ; CHECK-NEXT: addq %r10, %r10 ; CHECK-NEXT: addq %rcx, %r10 -; CHECK-NEXT: movq X(%rip), %r14 -; CHECK-NEXT: addq %rsi, %rbx -; CHECK-NEXT: addq %rbx, %rdi +; CHECK-NEXT: addq %r11, %rdi ; CHECK-NEXT: addq %rcx, %r10 -; CHECK-NEXT: addq %rbx, %r10 -; CHECK-NEXT: bswapq %r14 -; CHECK-NEXT: leaq (%rsi,%rdx), %r11 -; CHECK-NEXT: addq %rdi, %r11 -; CHECK-NEXT: addq %rax, %r14 -; CHECK-NEXT: addq %r11, %r14 +; CHECK-NEXT: addq %r11, %r10 +; CHECK-NEXT: movq X(%rip), %rbx +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: leaq (%rsi,%rdx), %rax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: addq %rdi, %rbx +; CHECK-NEXT: addq %rax, %rbx ; CHECK-NEXT: leaq (%rcx,%r9), %r11 ; CHECK-NEXT: addq %r11, %r11 ; CHECK-NEXT: addq %r10, %r11 -; CHECK-NEXT: movq X(%rip), %rax -; CHECK-NEXT: addq %rdi, %r14 -; CHECK-NEXT: addq %r14, %r8 +; CHECK-NEXT: addq %rbx, %r8 ; CHECK-NEXT: addq %r10, %r11 -; CHECK-NEXT: addq %r14, %r11 +; CHECK-NEXT: addq %rbx, %r11 +; CHECK-NEXT: movq X(%rip), %rax ; CHECK-NEXT: bswapq %rax -; CHECK-NEXT: leaq (%rdi,%rsi), %rbx -; CHECK-NEXT: addq %r8, %rbx ; CHECK-NEXT: addq %rdx, %rax -; CHECK-NEXT: addq %rbx, %rax +; CHECK-NEXT: leaq (%rdi,%rsi), %rdx +; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: leaq (%r10,%rcx), %rdx ; CHECK-NEXT: addq %rdx, %rdx ; CHECK-NEXT: addq %r11, %rdx -; CHECK-NEXT: movq X(%rip), %rbx -; CHECK-NEXT: addq %r8, %rax ; CHECK-NEXT: addq %rax, %r9 ; CHECK-NEXT: addq %r11, %rdx ; CHECK-NEXT: addq %rax, %rdx +; CHECK-NEXT: movq X(%rip), %rbx ; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: addq %rsi, %rbx ; CHECK-NEXT: leaq (%r8,%rdi), %rax ; CHECK-NEXT: addq %r9, %rax -; CHECK-NEXT: addq %rsi, %rbx +; CHECK-NEXT: addq %r9, %rbx ; CHECK-NEXT: addq %rax, %rbx ; CHECK-NEXT: leaq (%r11,%r10), %rax ; CHECK-NEXT: addq %rax, %rax ; CHECK-NEXT: addq %rdx, %rax -; CHECK-NEXT: movq X(%rip), %r14 -; CHECK-NEXT: addq %r9, %rbx ; CHECK-NEXT: addq %rbx, %rcx ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rbx, %rax -; CHECK-NEXT: bswapq %r14 +; CHECK-NEXT: movq X(%rip), %rbx +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: addq %rdi, %rbx ; CHECK-NEXT: leaq (%r9,%r8), %rsi ; CHECK-NEXT: addq %rcx, %rsi -; CHECK-NEXT: addq %rdi, %r14 -; CHECK-NEXT: addq %rsi, %r14 +; CHECK-NEXT: addq %rcx, %rbx +; CHECK-NEXT: addq %rsi, %rbx ; CHECK-NEXT: leaq (%rdx,%r11), %rsi ; CHECK-NEXT: addq %rsi, %rsi ; CHECK-NEXT: addq %rax, %rsi -; CHECK-NEXT: movq X(%rip), %rdi -; CHECK-NEXT: addq %rcx, %r14 -; CHECK-NEXT: addq %r14, %r10 +; CHECK-NEXT: addq %rbx, %r10 ; CHECK-NEXT: addq %rax, %rsi -; CHECK-NEXT: addq %r14, %rsi -; CHECK-NEXT: bswapq %rdi -; CHECK-NEXT: leaq (%rcx,%r9), %rbx +; CHECK-NEXT: addq %rbx, %rsi +; CHECK-NEXT: movq X(%rip), %rbx +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: addq %r8, %rbx +; CHECK-NEXT: leaq (%rcx,%r9), %rdi +; CHECK-NEXT: addq %r10, %rdi ; CHECK-NEXT: addq %r10, %rbx -; CHECK-NEXT: addq %r8, %rdi +; CHECK-NEXT: addq %rdi, %rbx +; CHECK-NEXT: leaq (%rax,%rdx), %rdi +; CHECK-NEXT: addq %rdi, %rdi +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: addq %rbx, %r11 +; CHECK-NEXT: addq %rsi, %rdi ; CHECK-NEXT: addq %rbx, %rdi -; CHECK-NEXT: leaq (%rax,%rdx), %r8 -; CHECK-NEXT: addq %r8, %r8 -; CHECK-NEXT: addq %rsi, %r8 -; CHECK-NEXT: addq %r10, %rdi -; CHECK-NEXT: addq %rdi, %r11 -; CHECK-NEXT: addq %rsi, %r8 -; CHECK-NEXT: addq %rdi, %r8 -; CHECK-NEXT: movq X(%rip), %rdi -; CHECK-NEXT: bswapq %rdi -; CHECK-NEXT: addq %r9, %rdi +; CHECK-NEXT: movq X(%rip), %r8 +; CHECK-NEXT: bswapq %r8 +; CHECK-NEXT: addq %r9, %r8 ; CHECK-NEXT: leaq (%r10,%rcx), %r9 ; CHECK-NEXT: addq %r11, %r9 -; CHECK-NEXT: addq %r9, %rdi +; CHECK-NEXT: addq %r11, %r8 +; CHECK-NEXT: addq %r9, %r8 ; CHECK-NEXT: addq %rax, %rsi ; CHECK-NEXT: addq %rsi, %rsi -; CHECK-NEXT: addq %r8, %rsi -; CHECK-NEXT: addq %r8, %rsi -; CHECK-NEXT: addq %r11, %rdi -; CHECK-NEXT: addq %rdi, %rdx ; CHECK-NEXT: addq %rdi, %rsi +; CHECK-NEXT: addq %rdi, %rsi +; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: addq %r8, %rsi ; CHECK-NEXT: movq X(%rip), %rax ; CHECK-NEXT: bswapq %rax ; CHECK-NEXT: addq %r10, %r11 ; CHECK-NEXT: movq %rax, X(%rip) ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: addq %rdx, %r11 -; CHECK-NEXT: addq %r11, %rax ; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: addq %r11, %rax ; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r14 diff --git a/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll b/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll index 38b7a2cdda64e8..261e8f873b2fe2 100644 --- a/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll +++ b/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll @@ -28,8 +28,8 @@ define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; CHECK-NEXT: movl %ebp, %esi ; CHECK-NEXT: calll callee@PLT -; CHECK-NEXT: addl %eax, %ebx ; CHECK-NEXT: addl %ebp, %ebx +; CHECK-NEXT: addl %eax, %ebx ; CHECK-NEXT: movl %ebx, %esi ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll index 17eda596601934..30e45a4583624e 100644 --- a/llvm/test/CodeGen/X86/add-sub-bool.ll +++ b/llvm/test/CodeGen/X86/add-sub-bool.ll @@ -68,11 +68,11 @@ define i32 @test_i32_add_add_idx0(i32 %x, i32 %y, i32 %z) nounwind { ; ; X64-LABEL: test_i32_add_add_idx0: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edx killed $edx def $rdx ; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (%rdi,%rsi), %eax ; X64-NEXT: andl $1, %edx -; X64-NEXT: addl %edx, %eax +; X64-NEXT: leal (%rdx,%rdi), %eax +; X64-NEXT: addl %esi, %eax ; X64-NEXT: retq %add = add i32 %y, %x %mask = and i32 %z, 1 diff --git a/llvm/test/CodeGen/X86/alias-static-alloca.ll b/llvm/test/CodeGen/X86/alias-static-alloca.ll index f8d4ccce89f966..5df2906d512a45 100644 --- a/llvm/test/CodeGen/X86/alias-static-alloca.ll +++ b/llvm/test/CodeGen/X86/alias-static-alloca.ll @@ -7,15 +7,15 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: leal (%rdi,%rsi), %eax -; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: leal (%rsi,%rdx), %eax ; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edi, %eax ; CHECK-NEXT: retq entry: %a0 = alloca i32 diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll index d5d10565f24b81..82aff699cf297a 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll @@ -117,10 +117,10 @@ declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vcvtsd2usi %xmm0, %rax -; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: vcvtsd2usi %xmm0, %rcx +; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rdx ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq @@ -136,10 +136,10 @@ declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2si64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vcvtsd2si %xmm0, %rax -; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: vcvtsd2si %xmm0, %rcx +; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rdx ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq @@ -155,10 +155,10 @@ declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2usi64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vcvtss2usi %xmm0, %rax -; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: vcvtss2usi %xmm0, %rcx +; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rdx ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq @@ -174,10 +174,10 @@ declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2si64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vcvtss2si %xmm0, %rax -; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: vcvtss2si %xmm0, %rcx +; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rdx ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 8e0943a10537b1..58d4e744c8e5b0 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -934,10 +934,10 @@ declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2usi32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtsd2usi %xmm0, %eax -; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: vcvtsd2usi %xmm0, %ecx +; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %edx ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ret{{[l|q]}} @@ -953,10 +953,10 @@ declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2si32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtsd2si %xmm0, %eax -; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: vcvtsd2si %xmm0, %ecx +; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %edx ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ret{{[l|q]}} @@ -972,10 +972,10 @@ declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2usi32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtss2usi %xmm0, %eax -; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: vcvtss2usi %xmm0, %ecx +; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %edx ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ret{{[l|q]}} @@ -991,10 +991,10 @@ declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2si32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtss2si %xmm0, %eax -; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: vcvtss2si %xmm0, %ecx +; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %edx ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ret{{[l|q]}} diff --git a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll index ffe5ffb64af14d..412c30906cb5c8 100644 --- a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll +++ b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll @@ -35,32 +35,32 @@ define dso_local x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, ; ; WIN64-LABEL: test_argv64i1: ; WIN64: # %bb.0: +; WIN64-NEXT: addq %rdx, %rcx +; WIN64-NEXT: addq %rdi, %rcx +; WIN64-NEXT: addq %rsi, %rcx +; WIN64-NEXT: addq %r8, %rcx +; WIN64-NEXT: addq %r9, %rcx +; WIN64-NEXT: addq %r10, %rcx +; WIN64-NEXT: addq %r11, %rcx +; WIN64-NEXT: addq %r12, %rcx +; WIN64-NEXT: addq %r14, %rcx +; WIN64-NEXT: addq %r15, %rcx ; WIN64-NEXT: addq %rcx, %rax -; WIN64-NEXT: addq %rdx, %rax -; WIN64-NEXT: addq %rdi, %rax -; WIN64-NEXT: addq %rsi, %rax -; WIN64-NEXT: addq %r8, %rax -; WIN64-NEXT: addq %r9, %rax -; WIN64-NEXT: addq %r10, %rax -; WIN64-NEXT: addq %r11, %rax -; WIN64-NEXT: addq %r12, %rax -; WIN64-NEXT: addq %r14, %rax -; WIN64-NEXT: addq %r15, %rax ; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax ; WIN64-NEXT: retq ; ; LINUXOSX64-LABEL: test_argv64i1: ; LINUXOSX64: # %bb.0: +; LINUXOSX64-NEXT: addq %rdx, %rcx +; LINUXOSX64-NEXT: addq %rdi, %rcx +; LINUXOSX64-NEXT: addq %rsi, %rcx +; LINUXOSX64-NEXT: addq %r8, %rcx +; LINUXOSX64-NEXT: addq %r9, %rcx +; LINUXOSX64-NEXT: addq %r12, %rcx +; LINUXOSX64-NEXT: addq %r13, %rcx +; LINUXOSX64-NEXT: addq %r14, %rcx +; LINUXOSX64-NEXT: addq %r15, %rcx ; LINUXOSX64-NEXT: addq %rcx, %rax -; LINUXOSX64-NEXT: addq %rdx, %rax -; LINUXOSX64-NEXT: addq %rdi, %rax -; LINUXOSX64-NEXT: addq %rsi, %rax -; LINUXOSX64-NEXT: addq %r8, %rax -; LINUXOSX64-NEXT: addq %r9, %rax -; LINUXOSX64-NEXT: addq %r12, %rax -; LINUXOSX64-NEXT: addq %r13, %rax -; LINUXOSX64-NEXT: addq %r14, %rax -; LINUXOSX64-NEXT: addq %r15, %rax ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax ; LINUXOSX64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll index 0caa8826e75c89..bb762469571d14 100644 --- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -939,7 +939,7 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 % ; X32: # %bb.0: ; X32-NEXT: pushl %ebp ; X32-NEXT: pushl %ebx -; X32-NEXT: subl $12, %esp +; X32-NEXT: subl $16, %esp ; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl %edi, %esi ; X32-NEXT: movl %edx, %ebx @@ -950,36 +950,37 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 % ; X32-NEXT: subl %esi, %ebx ; X32-NEXT: movl %edi, %eax ; X32-NEXT: subl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: imull %eax, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: imull %ebx, %eax -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %esi, %edx +; X32-NEXT: subl {{[0-9]+}}(%esp), %edx +; X32-NEXT: imull %ebx, %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: subl %ebp, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %eax, %ecx +; X32-NEXT: addl %edx, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: imull %ebp, %edi +; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: imull %edx, %edi ; X32-NEXT: addl {{[0-9]+}}(%esp), %esi ; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: addl %esi, %edi -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx -; X32-NEXT: imull %eax, %edx -; X32-NEXT: addl %edx, %edi +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: imull %ebp, %eax +; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %eax, %edi ; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: addl $12, %esp +; X32-NEXT: addl $16, %esp ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp ; X32-NEXT: retl @@ -1013,18 +1014,18 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 % ; WIN64-NEXT: # kill: def $r11d killed $r11d killed $r11 ; WIN64-NEXT: subl %r12d, %r11d ; WIN64-NEXT: imull %edx, %r11d -; WIN64-NEXT: addl %r9d, %r11d ; WIN64-NEXT: leal (%r14,%r15), %edx -; WIN64-NEXT: movl %r14d, %r9d -; WIN64-NEXT: subl %r15d, %r9d -; WIN64-NEXT: imull %esi, %r9d -; WIN64-NEXT: addl %r11d, %r9d +; WIN64-NEXT: # kill: def $r14d killed $r14d killed $r14 +; WIN64-NEXT: subl %r15d, %r14d +; WIN64-NEXT: imull %esi, %r14d +; WIN64-NEXT: addl %r11d, %r14d ; WIN64-NEXT: addl %ecx, %eax ; WIN64-NEXT: imull %r8d, %eax ; WIN64-NEXT: imull %ebx, %r10d -; WIN64-NEXT: addl %r10d, %eax ; WIN64-NEXT: imull %edi, %edx +; WIN64-NEXT: addl %r10d, %edx ; WIN64-NEXT: addl %edx, %eax +; WIN64-NEXT: addl %r14d, %eax ; WIN64-NEXT: addl %r9d, %eax ; WIN64-NEXT: popq %rbx ; WIN64-NEXT: retq @@ -1054,19 +1055,19 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 % ; LINUXOSX64-NEXT: leal (%r13,%r14), %r11d ; LINUXOSX64-NEXT: movl %r13d, %r12d ; LINUXOSX64-NEXT: subl %r14d, %r12d +; LINUXOSX64-NEXT: movl {{[0-9]+}}(%rsp), %r14d ; LINUXOSX64-NEXT: imull %edx, %r12d -; LINUXOSX64-NEXT: movl {{[0-9]+}}(%rsp), %edx -; LINUXOSX64-NEXT: addl %r9d, %r12d -; LINUXOSX64-NEXT: movl %r15d, %r9d -; LINUXOSX64-NEXT: subl %edx, %r9d -; LINUXOSX64-NEXT: imull %esi, %r9d -; LINUXOSX64-NEXT: addl %r12d, %r9d +; LINUXOSX64-NEXT: movl %r15d, %edx +; LINUXOSX64-NEXT: subl %r14d, %edx +; LINUXOSX64-NEXT: imull %esi, %edx +; LINUXOSX64-NEXT: addl %r12d, %edx ; LINUXOSX64-NEXT: addl %ecx, %eax ; LINUXOSX64-NEXT: imull %r8d, %eax ; LINUXOSX64-NEXT: imull %r10d, %r11d -; LINUXOSX64-NEXT: addl %r11d, %eax -; LINUXOSX64-NEXT: addl %r15d, %edx -; LINUXOSX64-NEXT: imull %edi, %edx +; LINUXOSX64-NEXT: addl %r15d, %r14d +; LINUXOSX64-NEXT: imull %edi, %r14d +; LINUXOSX64-NEXT: addl %r11d, %r14d +; LINUXOSX64-NEXT: addl %r14d, %eax ; LINUXOSX64-NEXT: addl %edx, %eax ; LINUXOSX64-NEXT: addl %r9d, %eax ; LINUXOSX64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 4b39dbd2975580..b3148629dd5541 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -1905,19 +1905,19 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] ; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] +; X64-NEXT: addq %rdx, %rcx # encoding: [0x48,0x01,0xd1] ; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] -; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff] +; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] +; X64-NEXT: leaq -1(%rax,%rdx), %rax # encoding: [0x48,0x8d,0x44,0x10,0xff] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) @@ -1987,23 +1987,23 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwin ; X64: # %bb.0: ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] -; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02] +; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] -; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] +; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] +; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] ; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05] ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] -; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] +; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) @@ -2078,19 +2078,19 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] ; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] +; X64-NEXT: addq %rdx, %rcx # encoding: [0x48,0x01,0xd1] ; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06] -; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff] +; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] +; X64-NEXT: leaq -1(%rax,%rdx), %rax # encoding: [0x48,0x8d,0x44,0x10,0xff] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) @@ -2160,23 +2160,23 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; X64: # %bb.0: ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] -; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02] +; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] -; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] +; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] +; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] ; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05] ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] -; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] +; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) @@ -2206,19 +2206,19 @@ define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] ; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X86-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1] ; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] -; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff] +; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X86-NEXT: leal -1(%eax,%edx), %eax # encoding: [0x8d,0x44,0x10,0xff] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -2228,19 +2228,19 @@ define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] ; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X64-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1] ; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] -; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff] +; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X64-NEXT: leal -1(%rax,%rdx), %eax # encoding: [0x8d,0x44,0x10,0xff] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) @@ -2268,23 +2268,23 @@ define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounw ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] -; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] +; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] +; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] +; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] +; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6] ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] -; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] +; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] ; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05] ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] -; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6] +; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6] ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X86-NEXT: popl %esi # encoding: [0x5e] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] @@ -2293,23 +2293,23 @@ define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounw ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] -; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] +; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] -; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05] ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] +; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) @@ -2339,19 +2339,19 @@ define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] ; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X86-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1] ; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06] -; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff] +; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X86-NEXT: leal -1(%eax,%edx), %eax # encoding: [0x8d,0x44,0x10,0xff] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -2361,19 +2361,19 @@ define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] ; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X64-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1] ; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06] -; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff] +; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X64-NEXT: leal -1(%rax,%rdx), %eax # encoding: [0x8d,0x44,0x10,0xff] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) @@ -2401,23 +2401,23 @@ define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) noun ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] -; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] +; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] +; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] +; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] +; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6] ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] -; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] +; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] ; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05] ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] -; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6] +; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6] ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X86-NEXT: popl %esi # encoding: [0x5e] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] @@ -2426,23 +2426,23 @@ define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) noun ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] -; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] +; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] -; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05] ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] +; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll index 587d7929e58483..0ee08a43f2657b 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll @@ -124,10 +124,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: pushl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: calll __divdi3 @@ -136,10 +136,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ecx, 4(%edx) ; X86-NEXT: movl %eax, (%edx) -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx -; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: imull %ebp, %ecx +; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: addl %edx, %ecx ; X86-NEXT: subl %eax, %esi ; X86-NEXT: sbbl %ecx, %edi @@ -192,8 +192,8 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X64-NEXT: movq %rax, (%rbx) ; X64-NEXT: imulq %rax, %r14 ; X64-NEXT: mulq %r15 -; X64-NEXT: addq %r14, %rdx ; X64-NEXT: imulq %r15, %rcx +; X64-NEXT: addq %r14, %rcx ; X64-NEXT: addq %rdx, %rcx ; X64-NEXT: subq %rax, %r13 ; X64-NEXT: sbbq %rcx, %r12 diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index 6f67a12f67938e..86dd5f32ea6f88 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -124,10 +124,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: pushl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: calll __udivdi3 @@ -136,10 +136,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ecx, 4(%edx) ; X86-NEXT: movl %eax, (%edx) -; X86-NEXT: imull %eax, %ebp -; X86-NEXT: mull %ebx -; X86-NEXT: addl %ebp, %edx -; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: mull %ebp +; X86-NEXT: imull %ebp, %ecx +; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: addl %edx, %ecx ; X86-NEXT: subl %eax, %esi ; X86-NEXT: sbbl %ecx, %edi @@ -192,8 +192,8 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X64-NEXT: movq %rax, (%rbx) ; X64-NEXT: imulq %rax, %r14 ; X64-NEXT: mulq %r15 -; X64-NEXT: addq %r14, %rdx ; X64-NEXT: imulq %r15, %rcx +; X64-NEXT: addq %r14, %rcx ; X64-NEXT: addq %rdx, %rcx ; X64-NEXT: subq %rax, %r13 ; X64-NEXT: sbbq %rcx, %r12 diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll index 80944d29560cb4..cd081051467c18 100644 --- a/llvm/test/CodeGen/X86/divide-by-constant.ll +++ b/llvm/test/CodeGen/X86/divide-by-constant.ll @@ -793,9 +793,9 @@ define i64 @udiv_i64_3(i64 %x) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: imull $-1431655765, %edi, %esi # imm = 0xAAAAAAAB +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -836,9 +836,9 @@ define i64 @udiv_i64_5(i64 %x) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: imull $-858993460, %ecx, %ecx # imm = 0xCCCCCCCC -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull $-858993459, %edi, %ecx # imm = 0xCCCCCCCD -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: imull $-858993459, %edi, %esi # imm = 0xCCCCCCCD +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -880,9 +880,9 @@ define i64 @udiv_i64_15(i64 %x) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edx ; X32-NEXT: imull $-286331154, %ecx, %ecx # imm = 0xEEEEEEEE -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull $-286331153, %edi, %ecx # imm = 0xEEEEEEEF -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: imull $-286331153, %edi, %esi # imm = 0xEEEEEEEF +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: retl @@ -924,9 +924,9 @@ define i64 @udiv_i64_17(i64 %x) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: imull $-252645136, %ecx, %ecx # imm = 0xF0F0F0F0 -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull $-252645135, %edi, %ecx # imm = 0xF0F0F0F1 -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: imull $-252645135, %edi, %esi # imm = 0xF0F0F0F1 +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -969,9 +969,9 @@ define i64 @udiv_i64_255(i64 %x) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edx ; X32-NEXT: imull $-16843010, %ecx, %ecx # imm = 0xFEFEFEFE -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull $-16843009, %esi, %ecx # imm = 0xFEFEFEFF -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: imull $-16843009, %esi, %esi # imm = 0xFEFEFEFF +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: retl ; @@ -1012,9 +1012,9 @@ define i64 @udiv_i64_257(i64 %x) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: imull $-16711936, %ecx, %ecx # imm = 0xFF00FF00 -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull $-16711935, %edi, %ecx # imm = 0xFF00FF01 -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: imull $-16711935, %edi, %esi # imm = 0xFF00FF01 +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -1148,9 +1148,9 @@ define i64 @udiv_i64_12(i64 %x) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: imull $-1431655765, %edi, %esi # imm = 0xAAAAAAAB +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll index 2e13776715e5be..c80e8d105b3428 100644 --- a/llvm/test/CodeGen/X86/divmod128.ll +++ b/llvm/test/CodeGen/X86/divmod128.ll @@ -482,8 +482,8 @@ define i128 @udiv_i128_3(i128 %x) nounwind { ; X86-64-NEXT: imulq %rdi, %rcx ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -505,8 +505,8 @@ define i128 @udiv_i128_3(i128 %x) nounwind { ; WIN64-NEXT: imulq %rcx, %r9 ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: @@ -532,8 +532,8 @@ define i128 @udiv_i128_5(i128 %x) nounwind { ; X86-64-NEXT: imulq %rdi, %rcx ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -555,8 +555,8 @@ define i128 @udiv_i128_5(i128 %x) nounwind { ; WIN64-NEXT: imulq %rcx, %r9 ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: @@ -584,8 +584,8 @@ define i128 @udiv_i128_15(i128 %x) nounwind { ; X86-64-NEXT: movabsq $-1229782938247303441, %r8 # imm = 0xEEEEEEEEEEEEEEEF ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -609,8 +609,8 @@ define i128 @udiv_i128_15(i128 %x) nounwind { ; WIN64-NEXT: movabsq $-1229782938247303441, %r10 # imm = 0xEEEEEEEEEEEEEEEF ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: @@ -638,8 +638,8 @@ define i128 @udiv_i128_17(i128 %x) nounwind { ; X86-64-NEXT: imulq %rdi, %rcx ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -663,8 +663,8 @@ define i128 @udiv_i128_17(i128 %x) nounwind { ; WIN64-NEXT: imulq %rcx, %r9 ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: @@ -694,8 +694,8 @@ define i128 @udiv_i128_255(i128 %x) nounwind { ; X86-64-NEXT: movabsq $-72340172838076673, %r8 # imm = 0xFEFEFEFEFEFEFEFF ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -721,8 +721,8 @@ define i128 @udiv_i128_255(i128 %x) nounwind { ; WIN64-NEXT: movabsq $-72340172838076673, %r10 # imm = 0xFEFEFEFEFEFEFEFF ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: @@ -750,8 +750,8 @@ define i128 @udiv_i128_257(i128 %x) nounwind { ; X86-64-NEXT: imulq %rdi, %rcx ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -775,8 +775,8 @@ define i128 @udiv_i128_257(i128 %x) nounwind { ; WIN64-NEXT: imulq %rcx, %r9 ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: @@ -806,8 +806,8 @@ define i128 @udiv_i128_65535(i128 %x) nounwind { ; X86-64-NEXT: movabsq $-281479271743489, %r8 # imm = 0xFFFEFFFEFFFEFFFF ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -833,8 +833,8 @@ define i128 @udiv_i128_65535(i128 %x) nounwind { ; WIN64-NEXT: movabsq $-281479271743489, %r10 # imm = 0xFFFEFFFEFFFEFFFF ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: @@ -862,8 +862,8 @@ define i128 @udiv_i128_65537(i128 %x) nounwind { ; X86-64-NEXT: imulq %rdi, %rcx ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -887,8 +887,8 @@ define i128 @udiv_i128_65537(i128 %x) nounwind { ; WIN64-NEXT: imulq %rcx, %r9 ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: @@ -916,8 +916,8 @@ define i128 @udiv_i128_12(i128 %x) nounwind { ; X86-64-NEXT: imulq %rdi, %rcx ; X86-64-NEXT: movq %rdi, %rax ; X86-64-NEXT: mulq %r8 -; X86-64-NEXT: addq %rcx, %rdx ; X86-64-NEXT: imulq %rsi, %r8 +; X86-64-NEXT: addq %rcx, %r8 ; X86-64-NEXT: addq %r8, %rdx ; X86-64-NEXT: retq ; @@ -941,8 +941,8 @@ define i128 @udiv_i128_12(i128 %x) nounwind { ; WIN64-NEXT: imulq %rcx, %r9 ; WIN64-NEXT: movq %rcx, %rax ; WIN64-NEXT: mulq %r10 -; WIN64-NEXT: addq %r9, %rdx ; WIN64-NEXT: imulq %r10, %r8 +; WIN64-NEXT: addq %r9, %r8 ; WIN64-NEXT: addq %r8, %rdx ; WIN64-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/fold-add.ll b/llvm/test/CodeGen/X86/fold-add.ll index a4dc03471fc301..26396c3fc66a26 100644 --- a/llvm/test/CodeGen/X86/fold-add.ll +++ b/llvm/test/CodeGen/X86/fold-add.ll @@ -97,10 +97,10 @@ define dso_local i64 @neg_0x80000001() #0 { ; MSTATIC-NEXT: movabsq $-2147483649, %rcx ; MSTATIC-NEXT: movabsq $foo, %rax ; MSTATIC-NEXT: addq %rcx, %rax -; MPIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax -; MPIC-NEXT: movabsq $foo@GOTOFF, %rcx -; MPIC-NEXT: addq %rax, %rcx +; MPIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rcx +; MPIC-NEXT: movabsq $foo@GOTOFF, %rdx ; MPIC-NEXT: movabsq $-2147483649, %rax +; MPIC-NEXT: addq %rdx, %rax ; MPIC-NEXT: addq %rcx, %rax entry: ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2147483649) diff --git a/llvm/test/CodeGen/X86/fold-tied-op.ll b/llvm/test/CodeGen/X86/fold-tied-op.ll index a8636a3496dc43..a08133bb7af483 100644 --- a/llvm/test/CodeGen/X86/fold-tied-op.ll +++ b/llvm/test/CodeGen/X86/fold-tied-op.ll @@ -24,87 +24,85 @@ define i64 @fn1() #0 { ; CHECK-NEXT: .cfi_offset %esi, -20 ; CHECK-NEXT: .cfi_offset %edi, -16 ; CHECK-NEXT: .cfi_offset %ebx, -12 -; CHECK-NEXT: movl $-1028477379, %ebx # imm = 0xC2B2AE3D -; CHECK-NEXT: movl $668265295, %ecx # imm = 0x27D4EB4F -; CHECK-NEXT: movl a, %edi -; CHECK-NEXT: cmpl $0, (%edi) +; CHECK-NEXT: movl $-1028477379, %edi # imm = 0xC2B2AE3D +; CHECK-NEXT: movl $668265295, %ebx # imm = 0x27D4EB4F +; CHECK-NEXT: movl a, %eax +; CHECK-NEXT: cmpl $0, (%eax) ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then -; CHECK-NEXT: movl 8(%edi), %esi -; CHECK-NEXT: movl 12(%edi), %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: shldl $1, %esi, %edx -; CHECK-NEXT: orl %eax, %edx -; CHECK-NEXT: leal (%esi,%esi), %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl 16(%edi), %ebx -; CHECK-NEXT: movl 20(%edi), %esi -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: shldl $2, %ebx, %eax -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: movl %esi, %ebx -; CHECK-NEXT: shldl $31, %eax, %ebx -; CHECK-NEXT: shll $2, %eax -; CHECK-NEXT: orl %ebx, %eax +; CHECK-NEXT: movl 8(%eax), %edi +; CHECK-NEXT: movl 12(%eax), %esi +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: shldl $1, %edi, %edx +; CHECK-NEXT: orl %esi, %edx +; CHECK-NEXT: leal (%edi,%edi), %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl 16(%eax), %ecx +; CHECK-NEXT: movl 20(%eax), %esi +; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: shldl $2, %ecx, %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: shldl $31, %ecx, %edi +; CHECK-NEXT: shll $2, %ecx +; CHECK-NEXT: orl %edi, %ecx ; CHECK-NEXT: shrl %esi ; CHECK-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: adcl %edx, %esi -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl 24(%edi), %eax +; CHECK-NEXT: movl 28(%eax), %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl 24(%eax), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl $-1028477379, %ebx # imm = 0xC2B2AE3D +; CHECK-NEXT: movl $-1028477379, %ecx # imm = 0xC2B2AE3D +; CHECK-NEXT: imull %eax, %ecx +; CHECK-NEXT: mull %ebx +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; CHECK-NEXT: imull %eax, %ebx -; CHECK-NEXT: mull %ecx -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: addl %ebx, %edx -; CHECK-NEXT: movl 28(%edi), %edi -; CHECK-NEXT: imull %edi, %ecx -; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: addl %ecx, %ebx +; CHECK-NEXT: addl %edx, %ebx +; CHECK-NEXT: imull $1336530590, %eax, %ecx # imm = 0x4FA9D69E +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: imull $-2056954758, %edx, %eax # imm = 0x85655C7A +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: movl $1336530590, %edx # imm = 0x4FA9D69E -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; CHECK-NEXT: movl %ebx, %eax ; CHECK-NEXT: mull %edx -; CHECK-NEXT: imull $-2056954758, %ebx, %ebx # imm = 0x85655C7A -; CHECK-NEXT: addl %edx, %ebx -; CHECK-NEXT: imull $1336530590, %edi, %edx # imm = 0x4FA9D69E -; CHECK-NEXT: addl %ebx, %edx -; CHECK-NEXT: shrdl $3, %ecx, %esi -; CHECK-NEXT: sarl $3, %ecx -; CHECK-NEXT: orl %edx, %ecx -; CHECK-NEXT: orl %eax, %esi -; CHECK-NEXT: movl $-66860409, %ebx # imm = 0xFC03CA87 -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: mull %ebx -; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: imull $326129324, %esi, %eax # imm = 0x137056AC -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: imull $-66860409, %ecx, %ecx # imm = 0xFC03CA87 +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: shrdl $3, %ebx, %edi +; CHECK-NEXT: sarl $3, %ebx +; CHECK-NEXT: orl %ecx, %ebx +; CHECK-NEXT: orl %eax, %edi +; CHECK-NEXT: imull $326129324, %edi, %eax # imm = 0x137056AC +; CHECK-NEXT: imull $-66860409, %ebx, %ecx # imm = 0xFC03CA87 ; CHECK-NEXT: addl %eax, %ecx -; CHECK-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; CHECK-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; CHECK-NEXT: movl %edi, b +; CHECK-NEXT: movl $-66860409, %ebx # imm = 0xFC03CA87 ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: mull %ebx -; CHECK-NEXT: imull $326129324, %edi, %esi # imm = 0x137056AC -; CHECK-NEXT: addl %edx, %esi +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; CHECK-NEXT: movl %ecx, b+4 +; CHECK-NEXT: imull $326129324, %eax, %edx # imm = 0x137056AC ; CHECK-NEXT: imull $-66860409, %ecx, %ecx # imm = 0xFC03CA87 +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: movl %eax, b +; CHECK-NEXT: mull %ebx ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .LBB0_2: # %if.else -; CHECK-NEXT: xorl b+4, %ebx -; CHECK-NEXT: xorl b, %ecx -; CHECK-NEXT: movl $1419758215, %edx # imm = 0x549FCA87 -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: mull %edx -; CHECK-NEXT: imull $93298681, %ecx, %esi # imm = 0x58F9FF9 -; CHECK-NEXT: addl %edx, %esi -; CHECK-NEXT: imull $1419758215, %ebx, %ecx # imm = 0x549FCA87 -; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: xorl b+4, %edi +; CHECK-NEXT: xorl b, %ebx +; CHECK-NEXT: movl $1419758215, %ecx # imm = 0x549FCA87 +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: imull $93298681, %ebx, %esi # imm = 0x58F9FF9 +; CHECK-NEXT: imull $1419758215, %edi, %ecx # imm = 0x549FCA87 ; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: addl %edx, %ecx ; CHECK-NEXT: addl $-1028477341, %eax # imm = 0xC2B2AE63 ; CHECK-NEXT: adcl $-2048144777, %ecx # imm = 0x85EBCA77 ; CHECK-NEXT: movl %eax, b diff --git a/llvm/test/CodeGen/X86/h-registers-1.ll b/llvm/test/CodeGen/X86/h-registers-1.ll index 07d85d260a37a7..7ed6ddf267a8ee 100644 --- a/llvm/test/CodeGen/X86/h-registers-1.ll +++ b/llvm/test/CodeGen/X86/h-registers-1.ll @@ -30,11 +30,11 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d ; CHECK-NEXT: addq %rdi, %rsi ; CHECK-NEXT: addq %rbp, %rdx -; CHECK-NEXT: addq %rsi, %rdx ; CHECK-NEXT: addq %rbx, %rcx ; CHECK-NEXT: addq %r8, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: popq %rbp @@ -63,11 +63,11 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) ; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %r8d ; GNUX32-NEXT: addq %rdi, %rsi ; GNUX32-NEXT: addq %rbp, %rdx -; GNUX32-NEXT: addq %rsi, %rdx ; GNUX32-NEXT: addq %rbx, %rcx ; GNUX32-NEXT: addq %r8, %rax ; GNUX32-NEXT: addq %rcx, %rax ; GNUX32-NEXT: addq %rdx, %rax +; GNUX32-NEXT: addq %rsi, %rax ; GNUX32-NEXT: popq %rbx ; GNUX32-NEXT: .cfi_def_cfa_offset 16 ; GNUX32-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/hipe-cc.ll b/llvm/test/CodeGen/X86/hipe-cc.ll index 83393c8c128f7a..fd27f974253713 100644 --- a/llvm/test/CodeGen/X86/hipe-cc.ll +++ b/llvm/test/CodeGen/X86/hipe-cc.ll @@ -21,7 +21,7 @@ entry: define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind { entry: - ; CHECK: addl %edx, %eax + ; CHECK: addl %edx, %ecx ; CHECK-NEXT: addl %ecx, %eax %0 = add i32 %x, %y %1 = add i32 %0, %z diff --git a/llvm/test/CodeGen/X86/hipe-cc64.ll b/llvm/test/CodeGen/X86/hipe-cc64.ll index 29741ae76db469..fec36996501247 100644 --- a/llvm/test/CodeGen/X86/hipe-cc64.ll +++ b/llvm/test/CodeGen/X86/hipe-cc64.ll @@ -23,9 +23,9 @@ entry: define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind { entry: - ; CHECK: leaq (%rsi,%rdx), %rax - ; CHECK-NEXT: addq %rcx, %rax + ; CHECK: leaq (%rdx,%rcx), %rax ; CHECK-NEXT: addq %r8, %rax + ; CHECK-NEXT: addq %rsi, %rax %0 = add i64 %x, %y %1 = add i64 %0, %z %2 = add i64 %1, %w diff --git a/llvm/test/CodeGen/X86/imul.ll b/llvm/test/CodeGen/X86/imul.ll index 9131688c4efcc1..94d786a65ffa8e 100644 --- a/llvm/test/CodeGen/X86/imul.ll +++ b/llvm/test/CodeGen/X86/imul.ll @@ -450,13 +450,18 @@ define i64 @test6(i64 %a) { ; ; X86-LABEL: test6: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $5, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: shll $5, %esi ; X86-NEXT: movl $33, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %esi, %edx ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl entry: %tmp3 = mul i64 %a, 33 diff --git a/llvm/test/CodeGen/X86/lea-opt-cse4.ll b/llvm/test/CodeGen/X86/lea-opt-cse4.ll index 8fe4516fe959e5..d63fdcbca79ed8 100644 --- a/llvm/test/CodeGen/X86/lea-opt-cse4.ll +++ b/llvm/test/CodeGen/X86/lea-opt-cse4.ll @@ -71,13 +71,13 @@ define void @foo_loop(ptr nocapture %ctx, i32 %n) local_unnamed_addr #0 { ; X64-NEXT: # %bb.2: # %exit ; X64-NEXT: addl %eax, %ecx ; X64-NEXT: leal 1(%rax,%rcx), %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: movl %ecx, 16(%rdi) +; X64-NEXT: leal (%rax,%rax), %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: addl %ecx, %edx +; X64-NEXT: movl %edx, 16(%rdi) ; X64-NEXT: retq ; ; X86-LABEL: foo_loop: @@ -102,13 +102,13 @@ define void @foo_loop(ptr nocapture %ctx, i32 %n) local_unnamed_addr #0 { ; X86-NEXT: # %bb.2: # %exit ; X86-NEXT: addl %ecx, %esi ; X86-NEXT: leal 1(%ecx,%esi), %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: movl %edx, 16(%eax) +; X86-NEXT: leal (%ecx,%ecx), %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl %esi, 16(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/lea-opt2.ll b/llvm/test/CodeGen/X86/lea-opt2.ll index cec19dcf49c8df..b076188c4e9182 100644 --- a/llvm/test/CodeGen/X86/lea-opt2.ll +++ b/llvm/test/CodeGen/X86/lea-opt2.ll @@ -35,13 +35,11 @@ entry: define i32 @test2(ptr %p, i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: addl %eax, %ecx ; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: addl %esi, %ecx ; CHECK-NEXT: movl %ecx, (%rdi) ; CHECK-NEXT: subl %edx, %eax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq entry: %0 = add i32 %a, %b @@ -55,13 +53,11 @@ entry: define i32 @test3(ptr %p, i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: addl %eax, %ecx ; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: addl %esi, %ecx ; CHECK-NEXT: movl %ecx, (%rdi) ; CHECK-NEXT: subl %edx, %eax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq entry: %0 = add i32 %a, %b @@ -114,8 +110,8 @@ define i64 @test6(ptr %p, i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: addq %rdx, %rcx ; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: addq %rdx, %rcx ; CHECK-NEXT: movq %rcx, (%rdi) ; CHECK-NEXT: subq %rdx, %rax ; CHECK-NEXT: retq @@ -132,8 +128,8 @@ define i64 @test7(ptr %p, i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: addq %rdx, %rcx ; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: addq %rdx, %rcx ; CHECK-NEXT: movq %rcx, (%rdi) ; CHECK-NEXT: subq %rdx, %rax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/lrshrink.ll b/llvm/test/CodeGen/X86/lrshrink.ll index 51f675d2451906..b72a5a413c5730 100644 --- a/llvm/test/CodeGen/X86/lrshrink.ll +++ b/llvm/test/CodeGen/X86/lrshrink.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s ; Checks if "%7 = add nuw nsw i64 %4, %2" is moved before the last call ; to minimize live-range. @@ -33,10 +33,10 @@ define i64 @test(i1 %a, i64 %r1, i64 %r2, i64 %s1, i64 %s2, i64 %t1, i64 %t2) { ; CHECK-NEXT: addq %r14, %r15 ; CHECK-NEXT: callq _Z3foov@PLT ; CHECK-NEXT: movl %eax, %r14d -; CHECK-NEXT: addq %r15, %r14 ; CHECK-NEXT: callq _Z3foov@PLT ; CHECK-NEXT: movl %eax, %eax ; CHECK-NEXT: addq %r14, %rax +; CHECK-NEXT: addq %r15, %rax ; CHECK-NEXT: addq %rbx, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 24 diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll index 601166d67f6f27..11c216e67bc825 100644 --- a/llvm/test/CodeGen/X86/midpoint-int.ll +++ b/llvm/test/CodeGen/X86/midpoint-int.ll @@ -304,40 +304,41 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: cmpl %ebp, %eax +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: setl %cl +; X86-NEXT: movzbl %cl, %edx ; X86-NEXT: jl .LBB5_1 ; X86-NEXT: # %bb.2: +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: movl %ebp, %esi ; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: jmp .LBB5_3 ; X86-NEXT: .LBB5_1: -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: .LBB5_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: negl %edx +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: sbbl %ebx, %edi ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi +; X86-NEXT: imull %eax, %edx ; X86-NEXT: imull %ebp, %edi +; X86-NEXT: addl %edx, %edi +; X86-NEXT: mull %ebp ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -376,42 +377,43 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setb %dl -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: testb %dl, %dl +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ebx, %eax +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: setb %cl +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: testb %cl, %cl ; X86-NEXT: jne .LBB6_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: jmp .LBB6_3 ; X86-NEXT: .LBB6_1: -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, %ebp +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: .LBB6_3: -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: orl $1, %ebx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: sbbl %ebp, %edi ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi +; X86-NEXT: imull %eax, %edx +; X86-NEXT: imull %ebx, %edi +; X86-NEXT: addl %edx, %edi +; X86-NEXT: mull %ebx ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -456,39 +458,39 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { ; X86-NEXT: pushl %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: cmpl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: movl 4(%edx), %esi +; X86-NEXT: cmpl %ecx, %eax ; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: sbbl %esi, %edx ; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: movzbl %dl, %edx ; X86-NEXT: jl .LBB7_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, %ebx ; X86-NEXT: jmp .LBB7_3 ; X86-NEXT: .LBB7_1: ; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: movl %eax, %edx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: .LBB7_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: negl %edx +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %edx, %eax +; X86-NEXT: subl %ebx, %eax ; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi +; X86-NEXT: imull %eax, %edx ; X86-NEXT: imull %ebp, %edi +; X86-NEXT: addl %edx, %edi +; X86-NEXT: mull %ebp ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl %esi, %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: adcl %esi, %edx ; X86-NEXT: addl $4, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -531,41 +533,42 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %eax -; X86-NEXT: movl 4(%edx), %edi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %eax +; X86-NEXT: movl 4(%ecx), %edi +; X86-NEXT: cmpl %ebp, %eax +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: setl %cl +; X86-NEXT: movzbl %cl, %edx ; X86-NEXT: jl .LBB8_1 ; X86-NEXT: # %bb.2: +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: movl %ebp, %esi ; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: jmp .LBB8_3 ; X86-NEXT: .LBB8_1: -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: .LBB8_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: negl %edx +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: sbbl %ebx, %edi ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi +; X86-NEXT: imull %eax, %edx ; X86-NEXT: imull %ebp, %edi +; X86-NEXT: addl %edx, %edi +; X86-NEXT: mull %ebp ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -610,40 +613,40 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X86-NEXT: pushl %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %esi -; X86-NEXT: movl 4(%eax), %ecx +; X86-NEXT: movl (%eax), %ecx +; X86-NEXT: movl 4(%eax), %esi ; X86-NEXT: movl (%edx), %eax ; X86-NEXT: movl 4(%edx), %edi -; X86-NEXT: cmpl %esi, %eax +; X86-NEXT: cmpl %ecx, %eax ; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: sbbl %esi, %edx ; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: movzbl %dl, %edx ; X86-NEXT: jl .LBB9_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, %ebx ; X86-NEXT: jmp .LBB9_3 ; X86-NEXT: .LBB9_1: ; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: movl %eax, %edx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: .LBB9_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: negl %edx +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %edx, %eax +; X86-NEXT: subl %ebx, %eax ; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload ; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx ; X86-NEXT: shrl %edi +; X86-NEXT: imull %eax, %edx ; X86-NEXT: imull %ebp, %edi +; X86-NEXT: addl %edx, %edi +; X86-NEXT: mull %ebp ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl %esi, %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: adcl %esi, %edx ; X86-NEXT: addl $4, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/misched-balance.ll b/llvm/test/CodeGen/X86/misched-balance.ll index aef363f11bbe2a..02d14253601595 100644 --- a/llvm/test/CodeGen/X86/misched-balance.ll +++ b/llvm/test/CodeGen/X86/misched-balance.ll @@ -9,8 +9,8 @@ define void @unrolled_mmult1(ptr %tmp55, ptr %tmp56, ptr %pre, ptr %pre94, entry: br label %for.body -; imull folded loads should be in order and interleaved with addl, never -; adjacent. Also check that we have no spilling. +; imull folded loads should be in order, addl may be reordered to reduce total +; latency. Also check that we have no spilling. ; ; Since mmult1 IR is already in good order, this effectively ensure ; the scheduler maintains source order. @@ -22,28 +22,28 @@ entry: ; CHECK: addl ; CHECK: imull 8 ; CHECK-NOT: {{imull|rsp}} -; CHECK: addl ; CHECK: imull 12 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl +; CHECK: addl ; CHECK: imull 16 ; CHECK-NOT: {{imull|rsp}} -; CHECK: addl ; CHECK: imull 20 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl ; CHECK: imull 24 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl +; CHECK: addl ; CHECK: imull 28 ; CHECK-NOT: {{imull|rsp}} -; CHECK: addl ; CHECK: imull 32 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl ; CHECK: imull 36 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl +; CHECK: addl ; CHECK-NOT: {{imull|rsp}} ; CHECK-LABEL: %end for.body: @@ -127,28 +127,28 @@ end: ; CHECK: addl ; CHECK: imull 8 ; CHECK-NOT: {{imull|rsp}} -; CHECK: addl ; CHECK: imull 12 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl +; CHECK: addl ; CHECK: imull 16 ; CHECK-NOT: {{imull|rsp}} -; CHECK: addl ; CHECK: imull 20 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl ; CHECK: imull 24 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl +; CHECK: addl ; CHECK: imull 28 ; CHECK-NOT: {{imull|rsp}} -; CHECK: addl ; CHECK: imull 32 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl ; CHECK: imull 36 ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl +; CHECK: addl ; CHECK-NOT: {{imull|rsp}} ; CHECK-LABEL: %end define void @unrolled_mmult2(ptr %tmp55, ptr %tmp56, ptr %pre, ptr %pre94, diff --git a/llvm/test/CodeGen/X86/misched-matrix.ll b/llvm/test/CodeGen/X86/misched-matrix.ll index fa56d6bf226d13..178bea92867ee7 100644 --- a/llvm/test/CodeGen/X86/misched-matrix.ll +++ b/llvm/test/CodeGen/X86/misched-matrix.ll @@ -33,25 +33,25 @@ ; ILPMIN: imull ; ILPMIN: addl ; ILPMIN: imull -; ILPMIN: addl ; ILPMIN: imull ; ILPMIN: addl +; ILPMIN: addl ; ILPMIN: movl %{{.*}}, 4( ; ILPMIN: imull ; ILPMIN: imull ; ILPMIN: addl ; ILPMIN: imull -; ILPMIN: addl ; ILPMIN: imull ; ILPMIN: addl +; ILPMIN: addl ; ILPMIN: movl %{{.*}}, 8( ; ILPMIN: imull ; ILPMIN: imull ; ILPMIN: addl ; ILPMIN: imull -; ILPMIN: addl ; ILPMIN: imull ; ILPMIN: addl +; ILPMIN: addl ; ILPMIN: movl %{{.*}}, 12( ; ILPMIN-LABEL: %for.end ; diff --git a/llvm/test/CodeGen/X86/mul-constant-i16.ll b/llvm/test/CodeGen/X86/mul-constant-i16.ll index 267ebca88e31e1..f04b89f54e3a30 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i16.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i16.ll @@ -558,10 +558,10 @@ define i16 @test_mul_by_28(i16 %x) { define i16 @test_mul_by_29(i16 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,8), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/mul-constant-i32.ll b/llvm/test/CodeGen/X86/mul-constant-i32.ll index b6791a84ffa459..44c140857f4c22 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i32.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i32.ll @@ -900,10 +900,10 @@ define i32 @test_mul_by_28(i32 %x) { define i32 @test_mul_by_29(i32 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,8), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; @@ -1179,6 +1179,7 @@ define i32 @test_mul_by_66(i32 %x) { ; ; X64-SLM-LABEL: test_mul_by_66: ; X64-SLM: # %bb.0: +; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi ; X64-SLM-NEXT: movl %edi, %eax ; X64-SLM-NEXT: shll $6, %eax ; X64-SLM-NEXT: addl %edi, %eax diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll index 94e09011fda6cb..c65858586f0db3 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i64.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll @@ -498,13 +498,18 @@ define i64 @test_mul_by_16(i64 %x) { define i64 @test_mul_by_17(i64 %x) { ; X86-LABEL: test_mul_by_17: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $4, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: shll $4, %esi ; X86-NEXT: movl $17, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %esi, %edx ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_17: @@ -685,13 +690,18 @@ define i64 @test_mul_by_21(i64 %x) { define i64 @test_mul_by_22(i64 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: leal (%eax,%ecx,4), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%ecx,%eax,4), %esi ; X86-NEXT: movl $22, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %esi, %edx ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_22: @@ -844,13 +854,18 @@ define i64 @test_mul_by_25(i64 %x) { define i64 @test_mul_by_26(i64 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: leal (%ecx,%ecx,4), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%eax,%eax,4), %esi ; X86-NEXT: movl $26, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_26: @@ -924,13 +939,18 @@ define i64 @test_mul_by_27(i64 %x) { define i64 @test_mul_by_28(i64 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,8), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %esi ; X86-NEXT: movl $28, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_28: @@ -971,14 +991,19 @@ define i64 @test_mul_by_28(i64 %x) { define i64 @test_mul_by_29(i64 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,8), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %esi +; X86-NEXT: addl %ecx, %ecx ; X86-NEXT: movl $29, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_29: @@ -1501,8 +1526,8 @@ define i64 @test_mul_spec(i64 %x) nounwind { ; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %edi ; X86-NEXT: imull %esi, %ebx -; X86-NEXT: addl %ebx, %edx ; X86-NEXT: imull %ecx, %edi +; X86-NEXT: addl %ebx, %edi ; X86-NEXT: addl %edi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -1537,8 +1562,8 @@ define i64 @test_mul_spec(i64 %x) nounwind { ; X86-NOOPT-NEXT: movl %esi, %eax ; X86-NOOPT-NEXT: mull %edi ; X86-NOOPT-NEXT: imull %esi, %ebx -; X86-NOOPT-NEXT: addl %ebx, %edx ; X86-NOOPT-NEXT: imull %ecx, %edi +; X86-NOOPT-NEXT: addl %ebx, %edi ; X86-NOOPT-NEXT: addl %edi, %edx ; X86-NOOPT-NEXT: popl %esi ; X86-NOOPT-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/mul-i1024.ll b/llvm/test/CodeGen/X86/mul-i1024.ll index 015137b3d350c6..fc40f50ab4e5a8 100644 --- a/llvm/test/CodeGen/X86/mul-i1024.ll +++ b/llvm/test/CodeGen/X86/mul-i1024.ll @@ -10,112 +10,111 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: subl $400, %esp # imm = 0x190 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 60(%ecx), %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl 56(%ecx), %edi +; X32-NEXT: movl 60(%ecx), %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%eax), %ebp -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl 56(%ecx), %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl (%ebp), %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 4(%eax), %ecx -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl 4(%ebp), %ecx +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %ecx, %edi +; X32-NEXT: movl %ecx, %ebp ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl 48(%edi), %esi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl 48(%esi), %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 52(%edi), %eax +; X32-NEXT: movl 52(%esi), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ebp -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebp, %ecx -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 8(%eax), %ebp -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 8(%eax), %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 12(%eax), %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl %ebp, %ebx -; X32-NEXT: setb (%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edi, %ebp +; X32-NEXT: movl 12(%eax), %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -123,166 +122,165 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %esi, %ebp -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 40(%esi), %edi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 44(%esi), %ebp +; X32-NEXT: movl 40(%esi), %ebp ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl 44(%esi), %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movl 32(%ebp), %edi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 32(%edi), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 36(%ebp), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %esi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl 36(%edi), %edi ; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx +; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %esi, %ebx -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %ebp, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebx, %edi +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: movl %edi, %esi ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %eax @@ -290,7 +288,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -300,67 +298,66 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 16(%eax), %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl 16(%eax), %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 20(%eax), %edx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: mull %edx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 20(%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebp -; X32-NEXT: setb %bl -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %esi, %edi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: setb %cl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebx, %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -368,77 +365,78 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 24(%eax), %ebx ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %ecx -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %esi, %ebp -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %eax, %esi +; X32-NEXT: adcl %ebp, %edi +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebp, %esi -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %bl +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %bl, %edi -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl %esi, %edi -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -446,72 +444,71 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl %edi, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %esi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: setb (%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -519,73 +516,74 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl %ebx, %ebp +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %ebp, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edi, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload +; X32-NEXT: adcl %edi, %eax +; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -595,116 +593,115 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 24(%esi), %ebp -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl 24(%esi), %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 28(%esi), %edi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebx +; X32-NEXT: movl 28(%esi), %ebp +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl 16(%edi), %esi -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl 16(%esi), %edi +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 20(%edi), %eax +; X32-NEXT: movl 20(%esi), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ebp -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebp, %ecx -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %esi, %ebx -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %ebx +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %ebp, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebx, %edi +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax @@ -715,89 +712,89 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 8(%esi), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 8(%esi), %ebp +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NEXT: movl 12(%esi), %ebp -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl 12(%esi), %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movl (%ebp), %edi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl (%esi), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 4(%ebp), %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %esi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl 4(%esi), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx +; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -805,44 +802,43 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %esi, %ebx -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl %ebp, %ebx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebx, %edi +; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx @@ -850,33 +846,33 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %eax +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload +; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: adcl $0, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -884,130 +880,130 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebp -; X32-NEXT: setb %bl +; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %esi +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %esi -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: adcl %ebp, %edi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %bl +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %bl, %edi -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl %esi, %edi -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: adcl $0, %edx @@ -1022,42 +1018,41 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl %edi, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %esi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %bl, %eax @@ -1067,96 +1062,98 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %ebp ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %esi -; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl %ebx, %ebp -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: addl %esi, %edi +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: addl %edi, %ebx +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax @@ -1173,7 +1170,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx @@ -1187,9 +1184,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload @@ -1204,53 +1201,50 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl 32(%ebx), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl 32(%ebp), %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 36(%eax), %ecx -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 36(%ebp), %ebp +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -1258,7 +1252,8 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebx, %ecx @@ -1270,23 +1265,24 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 40(%eax), %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl 40(%eax), %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %ebx +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 44(%eax), %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl 44(%eax), %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi @@ -1294,16 +1290,17 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %ebp +; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -1320,20 +1317,20 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -1341,61 +1338,60 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb %bl +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %ebp ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -1404,76 +1400,75 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: adcl %ebp, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %edi +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: adcl $0, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -1483,33 +1478,34 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 48(%eax), %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl 48(%eax), %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 52(%eax), %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, %edi ; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %bl ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %esi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %bl, %eax @@ -1517,29 +1513,29 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebp, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %esi -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -1554,34 +1550,34 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 60(%eax), %ecx ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebx, %ebp +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -1593,25 +1589,25 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl %ebx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax @@ -1625,7 +1621,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %edx +; X32-NEXT: movl %ebx, %edx ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %ebp, %ecx ; X32-NEXT: adcl $0, %ecx @@ -1668,27 +1664,27 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx @@ -1708,30 +1704,30 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %ebx, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -1743,30 +1739,30 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb %bl +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: addl %edi, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %ebx, %edi ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: movl %ecx, %edi -; X32-NEXT: adcl %eax, %edi +; X32-NEXT: movl %ecx, %ebx +; X32-NEXT: adcl %eax, %ebx ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -1779,10 +1775,11 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl %eax, %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -1793,7 +1790,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -1806,6 +1803,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -1849,55 +1847,54 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ebx, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ecx, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -1905,24 +1902,24 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: adcl %ebp, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -1930,7 +1927,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -1950,7 +1947,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -1960,268 +1957,267 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %esi -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %ebx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebx, %ebp -; X32-NEXT: setb %bl +; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebp, %esi -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %edi +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movl %ebp, %esi -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: adcl $0, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, %esi ; X32-NEXT: adcl %ebp, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ebx, %ebp +; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebx, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl %ebp, %edi -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: adcl $0, %edx @@ -2234,62 +2230,63 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %edx +; X32-NEXT: movl %ebx, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: mull %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi @@ -2299,82 +2296,83 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: setb %bl ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %esi, %edi ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %edi, %ebx +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ebx, %ebp +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: addl %ebx, %ebp +; X32-NEXT: mull %ebx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: adcl %eax, %ebp ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -2386,14 +2384,14 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebp -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl %eax, %ebx ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -2420,143 +2418,141 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 64(%eax), %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 68(%eax), %ecx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %ecx, %esi -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl 68(%eax), %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebp, %esi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ebp -; X32-NEXT: setb %bl -; X32-NEXT: movl %ecx, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebp, %ecx -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: setb %cl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 72(%eax), %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl 72(%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 76(%eax), %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: movl 76(%eax), %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: adcl %ebp, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %esi, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebx, %esi +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: adcl %ebp, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: mull %edi +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -2566,97 +2562,96 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ebx, %ebp ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %ebp +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movzbl %bl, %eax -; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edi -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebp, %ebx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -2664,42 +2659,44 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %bl, %ebx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload +; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -2707,64 +2704,63 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 80(%eax), %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 84(%eax), %ecx -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ecx, %ebp -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %ebx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: adcl %ebp, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %esi -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -2772,38 +2768,39 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 88(%eax), %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 92(%eax), %edi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, %esi +; X32-NEXT: adcl %ebx, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movl %edi, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -2816,26 +2813,26 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl %ebp, %edi -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %edi, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: adcl $0, %edx @@ -2848,15 +2845,16 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %edx +; X32-NEXT: movl %ebx, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %esi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -2866,130 +2864,130 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %esi, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, %ebp -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ebx, %ebp ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl $0, %edx +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %edi, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movl %edx, %edi +; X32-NEXT: adcl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ecx, %edx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %ebx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -2999,14 +2997,14 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -3041,7 +3039,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx @@ -3088,7 +3086,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, %esi ; X32-NEXT: adcl %ebp, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %edi @@ -3137,56 +3135,52 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: imull %eax, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: addl %edx, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %edx +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: imull %edi, %esi -; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %esi, %ebp +; X32-NEXT: addl %edx, %ebp +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %edx, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl %ebx, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebp -; X32-NEXT: movl %ebp, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebp -; X32-NEXT: setb %cl +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -3196,50 +3190,48 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: imull %ebx, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: addl %edx, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: imull %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %ecx, %esi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edx, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl %ebp, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull (%esp) # 4-byte Folded Reload ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -3248,79 +3240,79 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 104(%esi), %ebx -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 104(%ecx), %ebp +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 108(%esi), %esi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl 108(%ecx), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %ebx, %edi +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 96(%esi), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 96(%ecx), %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 100(%esi), %eax +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 100(%ecx), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %edi +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: adcl %ebp, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -3331,32 +3323,31 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %ebx, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %esi, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebp, %esi +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, %ebx +; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %esi -; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -3367,21 +3358,21 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %edi, %ebx +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %ebx, %ebp ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax @@ -3389,101 +3380,98 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 112(%ecx), %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: imull %edi, %esi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl 112(%ecx), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: imull %eax, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl 116(%ecx), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: imull %eax, %ebx +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: addl %edx, %ebx ; X32-NEXT: movl 120(%ecx), %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %ecx, %esi +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %ecx +; X32-NEXT: movl 124(%edx), %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: mull %ebp -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 124(%esi), %esi -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edx, %esi +; X32-NEXT: addl %edx, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ecx, %ebp +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: addl %edx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %edi +; X32-NEXT: imull %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %eax, %ecx +; X32-NEXT: imull %edi, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: mull %edi +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -3492,9 +3480,10 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -3516,7 +3505,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload @@ -3553,7 +3542,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax @@ -3644,7 +3633,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi @@ -3664,7 +3653,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -3689,7 +3678,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax @@ -3719,7 +3708,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ebp, %eax @@ -3814,7 +3803,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -3959,7 +3948,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -3994,7 +3983,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -4017,7 +4006,6 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %ebp ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx @@ -4027,18 +4015,18 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -4060,9 +4048,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -4093,7 +4081,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -4120,7 +4108,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi @@ -4152,7 +4140,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi @@ -4173,14 +4161,14 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %esi, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 104(%eax), %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi @@ -4202,7 +4190,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: adcl %ebp, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %esi @@ -4219,7 +4207,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl (%esp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -4251,56 +4239,53 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %edi, %esi -; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %eax, %edi +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: imull %edi, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %edx +; X32-NEXT: imull %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %esi +; X32-NEXT: addl %edx, %esi ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edx, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -4309,52 +4294,51 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx ; X32-NEXT: movl 120(%edi), %esi -; X32-NEXT: movl %edi, %ebx ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl 112(%edi), %edi -; X32-NEXT: movl 116(%ebx), %ebp +; X32-NEXT: movl 112(%edi), %ecx +; X32-NEXT: movl 116(%edi), %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: imull %ebp, %ebx -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %edx +; X32-NEXT: imull %edi, %edx +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %ecx, %ebx +; X32-NEXT: addl %edx, %ebx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %ebp -; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -4364,7 +4348,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -4393,7 +4377,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -4427,7 +4411,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax @@ -4461,9 +4445,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload @@ -4492,101 +4476,98 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl %ecx, %ebx ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %edi, %esi -; X32-NEXT: imull %eax, %esi +; X32-NEXT: imull %eax, %edi +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: addl %esi, %edx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: imull %ebp, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %edi +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %edx +; X32-NEXT: imull %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: imull %esi, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %edi ; X32-NEXT: addl %edx, %edi -; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edx, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebp -; X32-NEXT: movl %ebp, %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %cl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %esi, %ecx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: imull %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: movl %eax, %edi ; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: addl %edx, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: imull %ebp, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: mull %ebp +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, %ebx -; X32-NEXT: mull %esi +; X32-NEXT: movl %ebp, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi @@ -4602,7 +4583,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload @@ -4624,9 +4605,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -4637,9 +4618,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -4677,9 +4658,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -4717,7 +4698,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload @@ -4804,201 +4785,200 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: pushq %rbx ; X64-NEXT: subq $240, %rsp ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rsi, %rbp ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq 40(%rdi), %r12 -; X64-NEXT: movq 32(%rdi), %r14 -; X64-NEXT: movq 56(%rdi), %r15 +; X64-NEXT: movq 40(%rdi), %rbx +; X64-NEXT: movq 32(%rdi), %r12 +; X64-NEXT: movq 56(%rdi), %r14 ; X64-NEXT: movq 48(%rdi), %r10 ; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%rsi), %r11 -; X64-NEXT: movq 8(%rsi), %r8 +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq %r15, %rax -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rcx, %r9 -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: addq %rsi, %r9 +; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r9, %rcx -; X64-NEXT: adcq %rsi, %r10 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %r9, %r8 +; X64-NEXT: adcq %rdi, %r10 ; X64-NEXT: setb %al ; X64-NEXT: movzbl %al, %r9d -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r13 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %r10, %rsi -; X64-NEXT: adcq %r9, %r13 ; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %r10, %rdi +; X64-NEXT: adcq %r9, %r15 +; X64-NEXT: movq %r12, %rax ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r12, %rax +; X64-NEXT: movq %rbx, %r14 +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: addq %r9, %r11 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: addq %r11, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r10, %r9 ; X64-NEXT: setb %r10b -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r9, %rbx ; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq %rdi, %rbx -; X64-NEXT: adcq %rcx, %r11 -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: adcq $0, %r13 -; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq 16(%rbp), %r15 -; X64-NEXT: movq %r14, %r10 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: movq %r12, %rax -; X64-NEXT: movq %r12, %rcx +; X64-NEXT: addq %rbp, %rbx +; X64-NEXT: adcq %r8, %r11 +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: adcq $0, %r15 +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq 16(%r13), %rcx ; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %r14, %rbp +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rdi, %r14 +; X64-NEXT: addq %r8, %r14 ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq 24(%rbp), %rdi -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %rbp +; X64-NEXT: movq 24(%r13), %r13 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: addq %r14, %rax ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: adcq %r9, %r12 ; X64-NEXT: setb %r10b -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %r12, %r9 ; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rdi -; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: movq %r8, (%rsp) # 8-byte Spill +; X64-NEXT: adcq %rax, %r8 +; X64-NEXT: addq %rbx, %rsi +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r11, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r14, (%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: addq %rsi, %r9 -; X64-NEXT: adcq %r13, %rdi +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: addq %rdi, %r9 +; X64-NEXT: adcq %r15, %r8 ; X64-NEXT: setb %r10b ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rcx, %r11 -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: addq %rsi, %r11 +; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: addq %r11, %rax ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %sil +; X64-NEXT: adcq %rdi, %rsi +; X64-NEXT: setb %dil ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rbp -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %sil, %ecx -; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: mulq %r13 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: movzbl %dil, %esi +; X64-NEXT: adcq %rsi, %rdx ; X64-NEXT: addq %r9, %r14 ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rdi, %r11 +; X64-NEXT: adcq %r8, %r11 ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl %r10b, %ecx -; X64-NEXT: adcq %rcx, %rax +; X64-NEXT: movzbl %r10b, %esi +; X64-NEXT: adcq %rsi, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: movq 16(%r8), %rsi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq 16(%rcx), %r10 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 24(%r8), %r14 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq 24(%rcx), %r14 ; X64-NEXT: movq %r14, %rax ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r13 -; X64-NEXT: movq %r13, %r15 +; X64-NEXT: movq %r13, %rbx ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rcx, %r11 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rsi, %r8 ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %r10, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %r11, %rsi -; X64-NEXT: adcq %rdi, %rbx +; X64-NEXT: addq %r8, %rsi +; X64-NEXT: adcq %rdi, %r11 ; X64-NEXT: setb %r10b ; X64-NEXT: movq %r14, %rax ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %r13, %r12 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rbx, %rdi +; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %r11, %r8 ; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: movq (%r8), %r13 +; X64-NEXT: adcq %rax, %rbp +; X64-NEXT: movq (%rcx), %r13 ; X64-NEXT: movq %r13, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq 8(%r8), %rbp -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq 8(%rcx), %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %r11, %r14 ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq %r12, %r11 +; X64-NEXT: movq %r12, %rcx ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: addq %r14, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbx, %r12 ; X64-NEXT: setb %r10b -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r12, %rbx @@ -5006,31 +4986,32 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: adcq %rax, %r11 ; X64-NEXT: addq %r9, %rbx ; X64-NEXT: adcq %rsi, %r11 -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %r13, %r10 +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq %r13, %rcx ; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: mulq %r8 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rsi, %r14 ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq %r10, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: addq %r14, %rax ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: adcq %r9, %r12 ; X64-NEXT: setb %r10b -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %r12, %rsi @@ -5042,202 +5023,199 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: addq %rdi, %rsi -; X64-NEXT: adcq %rcx, %r9 +; X64-NEXT: addq %r8, %rsi +; X64-NEXT: adcq %rbp, %r9 ; X64-NEXT: setb %r10b -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: adcq $0, %r11 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r8 ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rcx, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %rbx, %rcx -; X64-NEXT: adcq %r11, %r14 +; X64-NEXT: addq %r11, %rcx +; X64-NEXT: adcq %r8, %rbx ; X64-NEXT: setb %r11b ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: addq %r14, %rax +; X64-NEXT: mulq %rbp +; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %r11b, %edi ; X64-NEXT: adcq %rdi, %rdx -; X64-NEXT: addq %rsi, %r13 +; X64-NEXT: addq %rsi, %r14 ; X64-NEXT: adcq %r9, %rcx ; X64-NEXT: movzbl %r10b, %esi ; X64-NEXT: adcq %rsi, %rax ; X64-NEXT: adcq $0, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq (%rsp), %rax # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: adcq (%rsp), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq 32(%rcx), %rdi -; X64-NEXT: movq %r8, %r10 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: movq 32(%r10), %rcx +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r8 +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r12, %r8 ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %r14 ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: addq %rsi, %r11 ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq 40(%rcx), %rsi -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %rdi +; X64-NEXT: movq 40(%r10), %rcx +; X64-NEXT: movq %r10, %rdi +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %r12 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %r11, %rsi ; X64-NEXT: adcq %r9, %rbx ; X64-NEXT: setb %r10b -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: addq %rbx, %r11 ; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %r9 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: movq %r12, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: addq %rbx, %r13 ; X64-NEXT: adcq $0, %r14 -; X64-NEXT: movq %r12, %rax -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill -; X64-NEXT: adcq %r14, %r10 -; X64-NEXT: setb %r15b -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %rbp, %rdi -; X64-NEXT: mulq %rbx +; X64-NEXT: adcq %r14, %r15 +; X64-NEXT: setb %r10b +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %r10, %r14 -; X64-NEXT: movzbl %r15b, %eax +; X64-NEXT: addq %r15, %r14 +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %rbx -; X64-NEXT: addq %r8, %r14 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: adcq %rsi, %rbx ; X64-NEXT: adcq $0, %r11 ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: movq 48(%rcx), %rcx -; X64-NEXT: movq %r12, %r15 -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rcx +; X64-NEXT: movq 48(%rdi), %r12 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r12 -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: addq %rsi, %r13 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq 56(%r8), %rsi -; X64-NEXT: movq %r15, %rax +; X64-NEXT: adcq $0, %r15 +; X64-NEXT: movq 56(%rdi), %rsi +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %r13, %rdi -; X64-NEXT: adcq %r10, %r15 -; X64-NEXT: setb %r8b -; X64-NEXT: movq %r12, %rax +; X64-NEXT: adcq %r15, %rcx +; X64-NEXT: setb %r10b +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %r8 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %r15, %r13 -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: addq %rcx, %r13 +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: addq %r14, %rbp -; X64-NEXT: movq %rbp, %r8 +; X64-NEXT: movq %rbp, %r10 ; X64-NEXT: adcq %rbx, %rdi ; X64-NEXT: adcq $0, %r13 ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: addq %r11, %r13 ; X64-NEXT: adcq %r9, %rsi -; X64-NEXT: setb %bpl +; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload ; X64-NEXT: movq %r14, %rax -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r9, %rbx -; X64-NEXT: adcq $0, %r10 +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: adcq $0, %r9 ; X64-NEXT: movq %r14, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rbx, %r9 -; X64-NEXT: adcq %r10, %r15 -; X64-NEXT: setb %r10b +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: adcq %r9, %rcx +; X64-NEXT: setb %r15b ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r15, %rbx -; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: movzbl %r15b, %eax ; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: addq %r13, %r12 -; X64-NEXT: adcq %rsi, %r9 -; X64-NEXT: movzbl %bpl, %eax +; X64-NEXT: addq %r13, %rbp +; X64-NEXT: adcq %rsi, %r12 +; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %rbx ; X64-NEXT: adcq $0, %r14 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: adcq %rax, (%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %rbp ; X64-NEXT: adcq $0, %r12 -; X64-NEXT: adcq $0, %r9 ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: adcq $0, %r14 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill @@ -5246,127 +5224,127 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rdi +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rcx, %r10 +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %r10, %r12 -; X64-NEXT: adcq %rsi, %r15 -; X64-NEXT: setb %r8b +; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: adcq %rsi, %rcx +; X64-NEXT: setb %r10b ; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %r15, %rsi -; X64-NEXT: movzbl %r8b, %eax -; X64-NEXT: adcq %rax, %r9 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: adcq %rax, %r9 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %r10, %r13 +; X64-NEXT: addq %rcx, %r13 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, %r11 -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r10, %r11 +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r15, %r10 -; X64-NEXT: setb %r8b +; X64-NEXT: adcq %r15, %rcx +; X64-NEXT: setb %r10b ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r15 -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %r10, %rbp -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: addq %rcx, %rbp +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %r13 -; X64-NEXT: addq %rdi, %rbp +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: adcq %r12, %r13 -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: adcq $0, %rdi ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq %r11, %r8 +; X64-NEXT: movq %r11, %r10 ; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq %r15, %rax ; X64-NEXT: movq %r15, %r12 -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rdi, %r15 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: addq %rcx, %r15 +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %r15, %rax ; X64-NEXT: movq %rax, %r15 -; X64-NEXT: adcq %r10, %r8 +; X64-NEXT: adcq %r8, %rcx ; X64-NEXT: setb %r10b ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %r8, %r12 +; X64-NEXT: addq %rcx, %r12 ; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rdi +; X64-NEXT: adcq %rax, %r8 ; X64-NEXT: addq %rbp, %r11 ; X64-NEXT: adcq %r13, %r15 ; X64-NEXT: movq %r15, %rbp ; X64-NEXT: adcq $0, %r12 -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: addq %rsi, %r12 -; X64-NEXT: adcq %r9, %rdi -; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: movq %r15, %rax -; X64-NEXT: movq %rcx, %rsi -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: adcq %r9, %r8 +; X64-NEXT: setb %r9b ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rcx, %r8 +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %r15, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: addq %r8, %rax -; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movq %rax, %rdi ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r13, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq %r12, %r10 -; X64-NEXT: adcq %rdi, %r8 -; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; X64-NEXT: adcq %r8, %rdi +; X64-NEXT: movzbl %r9b, %ecx ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: adcq $0, %rdx @@ -5381,350 +5359,342 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %r10 ; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq 64(%r9), %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq 64(%r10), %rsi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload ; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r15 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rsi, %r8 -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq 72(%r9), %rsi -; X64-NEXT: movq %r9, %rcx -; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rsi, %r15 +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rcx, %r9 +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: movq 72(%r10), %rcx +; X64-NEXT: movq %r10, %rsi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r8, %rbx -; X64-NEXT: adcq %rdi, %r10 -; X64-NEXT: setb %r8b +; X64-NEXT: addq %r9, %rbx +; X64-NEXT: adcq %r8, %rcx +; X64-NEXT: setb %r10b ; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdi, %r13 +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %r10, %r9 -; X64-NEXT: movzbl %r8b, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: movq %r12, %rax -; X64-NEXT: movq %r15, %rdi +; X64-NEXT: addq %rcx, %r9 +; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: adcq %rax, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rax ; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %r8, %r14 -; X64-NEXT: adcq $0, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax -; X64-NEXT: movq %r12, %rdi +; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: addq %rcx, %r15 +; X64-NEXT: adcq $0, %r14 +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: addq %r14, %rax +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: addq %r15, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r10, %r8 +; X64-NEXT: adcq %r14, %rcx ; X64-NEXT: setb %r10b -; X64-NEXT: movq %r15, %rax -; X64-NEXT: movq %r15, %r12 +; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %r8, %rbp +; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %r14 ; X64-NEXT: addq %r11, %rbp ; X64-NEXT: adcq %rbx, %r14 ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movq 80(%rcx), %r15 -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq %rsi, %r10 +; X64-NEXT: movq 80(%rsi), %r15 +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %rsi ; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %r8, %r11 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq 88(%rbx), %rbx -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: addq %r11, %rax -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: adcq %r10, %r8 -; X64-NEXT: setb %r10b +; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: adcq $0, %r11 +; X64-NEXT: movq 88(%r10), %r10 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: addq %rbx, %rax +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: adcq %r11, %rcx +; X64-NEXT: setb %r11b ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %r8, %r13 -; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: addq %rcx, %r13 +; X64-NEXT: movzbl %r11b, %eax ; X64-NEXT: adcq %rax, %r12 -; X64-NEXT: addq %rbp, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r14, %r11 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %rbp, %rsi +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r14, %rbx +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %r13 ; X64-NEXT: adcq $0, %r12 ; X64-NEXT: addq %r9, %r13 -; X64-NEXT: adcq %rsi, %r12 -; X64-NEXT: setb %bpl +; X64-NEXT: adcq %rdi, %r12 +; X64-NEXT: setb %r9b +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %rdi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rdi, %r10 +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %r8 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, %r14 -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: addq %r10, %rax -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: adcq %r8, %rdi +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: addq %rbx, %rax +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: adcq %r8, %rcx ; X64-NEXT: setb %r8b -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rbx -; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %r8b, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: addq %r13, %rsi +; X64-NEXT: addq %r13, %rdi +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r12, %rsi ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r12, %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl %bpl, %ecx +; X64-NEXT: movzbl %r9b, %ecx ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %rbx -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: imulq %rax, %r10 +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: imulq %rcx, %r15 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: imulq %rsi, %r15 +; X64-NEXT: addq %r10, %r15 ; X64-NEXT: addq %rdx, %r15 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: imulq %rsi, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r10, %rdx +; X64-NEXT: movq %rax, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: imulq %r9, %rbx -; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: addq %r8, %rdi -; X64-NEXT: adcq %r15, %rbx -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: imulq %rbx, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: imulq %rdi, %r10 +; X64-NEXT: addq %rdx, %r10 +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rdx, %r10 +; X64-NEXT: addq %rcx, %r8 +; X64-NEXT: adcq %r15, %r10 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: addq %r15, %r13 +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %r8, %r15 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %r15, %r11 -; X64-NEXT: adcq %r10, %r8 -; X64-NEXT: setb %r10b -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %rcx +; X64-NEXT: addq %r13, %r15 +; X64-NEXT: adcq %r12, %rbp +; X64-NEXT: setb %cl +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %r8, %r15 -; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %rbp, %r12 +; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: addq %rdi, %r15 -; X64-NEXT: adcq %rbx, %rsi +; X64-NEXT: addq %r8, %r12 +; X64-NEXT: adcq %r10, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq 120(%rdx), %rdi +; X64-NEXT: movq 120(%rdx), %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %rax, %rdi -; X64-NEXT: movq 112(%rdx), %rbx -; X64-NEXT: movq %rdx, %r12 -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: mulq %rbx +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: movq 112(%rdx), %r9 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rdi, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: imulq %rcx, %rbx -; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: movq 96(%r12), %r10 -; X64-NEXT: movq 104(%r12), %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: imulq %rbx, %r9 +; X64-NEXT: addq %rcx, %r9 +; X64-NEXT: addq %rdx, %r9 +; X64-NEXT: movq 96(%rdi), %rbp +; X64-NEXT: movq 104(%rdi), %rdi ; X64-NEXT: movq %r14, %rax -; X64-NEXT: movq %r14, %r12 -; X64-NEXT: imulq %rdi, %r12 -; X64-NEXT: mulq %r10 +; X64-NEXT: imulq %rdi, %rax +; X64-NEXT: imulq %rbp, %r11 +; X64-NEXT: addq %rax, %r11 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %r12, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: imulq %r10, %r14 -; X64-NEXT: addq %rdx, %r14 +; X64-NEXT: addq %rdx, %r11 ; X64-NEXT: addq %r8, %r13 -; X64-NEXT: adcq %rbx, %r14 -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rbp +; X64-NEXT: adcq %r9, %r11 +; X64-NEXT: movq %r11, %r14 +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rbp +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %r8, %r9 +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %r8, %r12 -; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r12, %rbx -; X64-NEXT: adcq %rbp, %r10 -; X64-NEXT: setb %r8b +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %r9, %r8 +; X64-NEXT: adcq %rcx, %rbp +; X64-NEXT: setb %cl ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: addq %r10, %rax -; X64-NEXT: movzbl %r8b, %edi -; X64-NEXT: adcq %rdi, %rdx +; X64-NEXT: mulq %rbx +; X64-NEXT: addq %rbp, %rax +; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq %r13, %rax ; X64-NEXT: adcq %r14, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: adcq %r15, %rax +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq %r15, %r8 +; X64-NEXT: adcq %r12, %rax ; X64-NEXT: adcq %rsi, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: movq 80(%r13), %r8 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq 80(%r8), %r9 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 88(%r13), %r11 -; X64-NEXT: movq %r13, %r10 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq 88(%r8), %rbx +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: movq %rsi, %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: mulq %r8 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rdi, %r14 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %dil -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, %r11 -; X64-NEXT: movq %rdx, %r13 +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rsi ; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: adcq %rax, %r13 -; X64-NEXT: movq %r10, %rdi -; X64-NEXT: movq 64(%r10), %r10 -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: adcq %rax, %rbx +; X64-NEXT: movq 64(%r8), %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 72(%rdi), %rax -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: mulq %r9 +; X64-NEXT: movq 72(%r8), %rax +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, %r12 ; X64-NEXT: addq %rcx, %r12 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq %r10, %rax -; X64-NEXT: movq %r11, %r9 -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %r12, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r15, %rcx ; X64-NEXT: setb %dil -; X64-NEXT: movq %r8, %r11 -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r13, %r11 +; X64-NEXT: movq %r13, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: adcq %rax, %r12 -; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: addq %r10, %rbp ; X64-NEXT: adcq %r14, %r12 ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: adcq $0, %r13 -; X64-NEXT: movq %r10, %rdi -; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r10, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: mulq %r8 +; X64-NEXT: adcq $0, %rbx +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, %rbx ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rcx, %r14 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: addq %r14, %rax -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %r14, %r8 ; X64-NEXT: adcq %r10, %rcx ; X64-NEXT: setb %dil -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r14 @@ -5733,26 +5703,26 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: adcq %rax, %r10 ; X64-NEXT: addq %rbp, %r9 ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r12, %r11 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r12, %r8 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %r14 ; X64-NEXT: adcq $0, %r10 ; X64-NEXT: addq %rsi, %r14 -; X64-NEXT: adcq %r13, %r10 +; X64-NEXT: adcq %rbx, %r10 ; X64-NEXT: setb %dil -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rcx, %r9 ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %r9, %rax @@ -5764,8 +5734,8 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: addq %r14, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r14, %r8 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r10, %r9 ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movzbl %dil, %ecx @@ -5773,160 +5743,163 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq 96(%rdi), %rsi -; X64-NEXT: imulq %rsi, %r15 -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %r8, %rcx -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %r15, %rdx -; X64-NEXT: movq 104(%rdi), %r9 -; X64-NEXT: imulq %r9, %rcx -; X64-NEXT: addq %rdx, %rcx -; X64-NEXT: movq %rcx, %r14 -; X64-NEXT: movq 112(%rdi), %rax -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: imulq %r12, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq 120(%rdi), %rdi -; X64-NEXT: imulq %r15, %rdi -; X64-NEXT: addq %rdx, %rdi -; X64-NEXT: addq %r10, %r8 -; X64-NEXT: adcq %r14, %rdi -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq 96(%r8), %rcx +; X64-NEXT: imulq %rcx, %r15 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r13, %r9 +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: movq 104(%r8), %rdi +; X64-NEXT: imulq %rdi, %r9 +; X64-NEXT: addq %r15, %r9 +; X64-NEXT: addq %rdx, %r9 +; X64-NEXT: movq 112(%r8), %rax +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: imulq %r11, %rdx +; X64-NEXT: movq 120(%r8), %r8 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: imulq %r10, %r8 +; X64-NEXT: addq %rdx, %r8 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %r10, %r13 -; X64-NEXT: adcq $0, %r14 -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: addq %rdx, %r8 +; X64-NEXT: addq %rsi, %r13 +; X64-NEXT: adcq %r9, %r8 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r10, %r9 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %r13, %rbp -; X64-NEXT: adcq %r14, %rcx -; X64-NEXT: setb %sil -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: addq %rsi, %r10 +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq %r9, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %r10, %r12 +; X64-NEXT: adcq %rcx, %rsi +; X64-NEXT: setb %cl +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rcx, %r14 -; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: addq %rsi, %r14 +; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: adcq %rax, %r10 -; X64-NEXT: addq %r8, %r14 -; X64-NEXT: adcq %rdi, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: addq %r13, %r14 +; X64-NEXT: adcq %r8, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: imulq %rax, %rsi +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: imulq %r15, %rdi -; X64-NEXT: movq %r15, %rax +; X64-NEXT: imulq %r15, %rcx +; X64-NEXT: addq %rsi, %rcx +; X64-NEXT: addq %rdx, %rcx +; X64-NEXT: movq %rcx, %r8 +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi +; X64-NEXT: imulq %rsi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: imulq %rdi, %rbx +; X64-NEXT: addq %rax, %rbx +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %rdi, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: imulq %r12, %rsi -; X64-NEXT: addq %rdx, %rsi -; X64-NEXT: movq %rsi, %r8 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, %rsi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: imulq %r9, %rsi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: imulq %r11, %rbx ; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: addq %rcx, %r13 +; X64-NEXT: addq %r9, %rcx ; X64-NEXT: adcq %r8, %rbx -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rbx, %rbp +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %r8, %rdi +; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %r8, %rsi -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rsi, %r11 -; X64-NEXT: adcq %rdi, %r8 +; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: adcq %rsi, %r8 ; X64-NEXT: setb %sil -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %r15 ; X64-NEXT: addq %r8, %rax ; X64-NEXT: movzbl %sil, %esi ; X64-NEXT: adcq %rsi, %rdx -; X64-NEXT: addq %r13, %rax -; X64-NEXT: adcq %rbx, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: adcq %rbp, %r11 +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rbp, %rdx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: adcq %r12, %r11 ; X64-NEXT: adcq %r14, %rax ; X64-NEXT: adcq %r10, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: movq %rsi, %r8 -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NEXT: movq %rdi, %r9 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, (%rsi) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 8(%rsi) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 16(%rsi) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 24(%rsi) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 32(%rsi) -; X64-NEXT: movq (%rsp), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 40(%rsi) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 48(%rsi) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 56(%rsi) -; X64-NEXT: movq %r8, 64(%rsi) -; X64-NEXT: movq %r9, 72(%rsi) -; X64-NEXT: movq %r10, 80(%rsi) -; X64-NEXT: movq %rbx, 88(%rsi) -; X64-NEXT: movq %rcx, 96(%rsi) -; X64-NEXT: movq %r11, 104(%rsi) -; X64-NEXT: movq %rax, 112(%rsi) -; X64-NEXT: movq %rdx, 120(%rsi) +; X64-NEXT: movq %rsi, (%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 8(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 16(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 24(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 32(%rcx) +; X64-NEXT: movq (%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 40(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 48(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 56(%rcx) +; X64-NEXT: movq %rdi, 64(%rcx) +; X64-NEXT: movq %r8, 72(%rcx) +; X64-NEXT: movq %r9, 80(%rcx) +; X64-NEXT: movq %r10, 88(%rcx) +; X64-NEXT: movq %r13, 96(%rcx) +; X64-NEXT: movq %r11, 104(%rcx) +; X64-NEXT: movq %rax, 112(%rcx) +; X64-NEXT: movq %rdx, 120(%rcx) ; X64-NEXT: addq $240, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/mul-i256.ll b/llvm/test/CodeGen/X86/mul-i256.ll index 9382278ff5c2d1..434d14ad4abeda 100644 --- a/llvm/test/CodeGen/X86/mul-i256.ll +++ b/llvm/test/CodeGen/X86/mul-i256.ll @@ -23,85 +23,85 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 { ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 12(%ecx), %esi -; X32-NEXT: movl 8(%ecx), %ebx +; X32-NEXT: movl 12(%ecx), %ebp +; X32-NEXT: movl 8(%ecx), %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl (%eax), %ebx ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%eax), %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 4(%eax), %ebp -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 4(%eax), %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl (%edi), %esi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl 4(%edi), %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl 4(%edi), %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 8(%eax), %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 8(%eax), %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -110,55 +110,54 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 { ; X32-NEXT: movl 12(%eax), %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb %bl +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ebx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl (%esp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edi, %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -168,43 +167,41 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 { ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 16(%ecx), %edi -; X32-NEXT: movl %ebx, %esi -; X32-NEXT: imull %edi, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %edi, %ebx ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl 20(%ecx), %eax +; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: imull %eax, %ebp -; X32-NEXT: addl %edx, %ebp +; X32-NEXT: movl 20(%ecx), %ebp +; X32-NEXT: imull %ebp, %esi +; X32-NEXT: addl %ebx, %esi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl 24(%ecx), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: imull %esi, %edx ; X32-NEXT: movl 28(%ecx), %ecx -; X32-NEXT: imull %esi, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: adcl %ebp, %ecx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: mull %ebx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebx, %eax @@ -216,84 +213,84 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 { ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl 28(%edi), %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl 24(%edi), %ecx -; X32-NEXT: mull %ecx +; X32-NEXT: movl 28(%edi), %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %ecx +; X32-NEXT: movl 24(%edi), %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl 16(%edi), %ebp +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: movl 16(%edi), %ecx ; X32-NEXT: movl 20(%edi), %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %edi -; X32-NEXT: imull %ebx, %edi +; X32-NEXT: movl %eax, %edx +; X32-NEXT: imull %ebx, %edx ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebp -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %ecx, %edi +; X32-NEXT: addl %edx, %edi +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edx, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebx, %esi -; X32-NEXT: adcl %ecx, %edi -; X32-NEXT: setb %cl +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, (%ecx) +; X32-NEXT: movl %edi, (%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 4(%ecx) +; X32-NEXT: movl %edi, 4(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 8(%ecx) +; X32-NEXT: movl %edi, 8(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 12(%ecx) -; X32-NEXT: movl %ebx, 16(%ecx) -; X32-NEXT: movl %esi, 20(%ecx) -; X32-NEXT: movl %eax, 24(%ecx) -; X32-NEXT: movl %edx, 28(%ecx) +; X32-NEXT: movl %edi, 12(%esi) +; X32-NEXT: movl %ebx, 16(%esi) +; X32-NEXT: movl %ecx, 20(%esi) +; X32-NEXT: movl %eax, 24(%esi) +; X32-NEXT: movl %edx, 28(%esi) ; X32-NEXT: addl $72, %esp ; X32-NEXT: .cfi_def_cfa_offset 20 ; X32-NEXT: popl %esi @@ -312,9 +309,12 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 { ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: pushq %r14 ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %r12 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: .cfi_offset %rbx, -32 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: .cfi_offset %rbx, -40 +; X64-NEXT: .cfi_offset %r12, -32 ; X64-NEXT: .cfi_offset %r14, -24 ; X64-NEXT: .cfi_offset %r15, -16 ; X64-NEXT: movq %rdx, %rcx @@ -329,17 +329,17 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 { ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r15, %rdx ; X64-NEXT: imulq %r14, %r10 +; X64-NEXT: addq %r15, %r10 ; X64-NEXT: addq %rdx, %r10 -; X64-NEXT: movq %r8, %r15 -; X64-NEXT: imulq %r11, %r15 +; X64-NEXT: movq %r8, %r12 +; X64-NEXT: imulq %r11, %r12 ; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r15, %rdx ; X64-NEXT: movq 24(%rsi), %r15 ; X64-NEXT: imulq %rbx, %r15 +; X64-NEXT: addq %r12, %r15 ; X64-NEXT: addq %rdx, %r15 ; X64-NEXT: addq %rdi, %r8 ; X64-NEXT: adcq %r10, %r15 @@ -372,6 +372,8 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 { ; X64-NEXT: movq %rax, 16(%rcx) ; X64-NEXT: movq %rdx, 24(%rcx) ; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: popq %r12 ; X64-NEXT: .cfi_def_cfa_offset 24 ; X64-NEXT: popq %r14 ; X64-NEXT: .cfi_def_cfa_offset 16 diff --git a/llvm/test/CodeGen/X86/mul-i512.ll b/llvm/test/CodeGen/X86/mul-i512.ll index 08d0f7cd082207..f5254ed37b53d7 100644 --- a/llvm/test/CodeGen/X86/mul-i512.ll +++ b/llvm/test/CodeGen/X86/mul-i512.ll @@ -33,7 +33,6 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %ecx, %ebp -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -47,73 +46,74 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 16(%ecx), %ebp -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 16(%ecx), %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl 20(%ecx), %ebx -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl 20(%ecx), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %cl -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %cl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movzbl %cl, %eax +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl 8(%edi), %ebp -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 8(%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl 12(%edi), %ecx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 12(%eax), %ecx +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: adcl %ebp, %edi -; X32-NEXT: setb %bl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -125,7 +125,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp @@ -145,7 +145,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ecx, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -154,9 +154,9 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 8(%ecx), %ebx -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 8(%ecx), %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -166,19 +166,20 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload @@ -191,66 +192,66 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 4(%ecx), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %esi -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: adcl %ebp, %edi +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -262,27 +263,28 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %ebp, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: movl %ebp, %edx ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %eax @@ -295,111 +297,109 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 16(%eax), %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl 16(%eax), %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 20(%eax), %ecx -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %ecx, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 24(%eax), %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl 24(%eax), %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 28(%eax), %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl 28(%eax), %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: adcl %ebp, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, %ebp -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ebx, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edx -; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -407,31 +407,29 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %eax @@ -447,21 +445,20 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl %edi, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx @@ -470,81 +467,80 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edi, %ebx +; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl %ebx, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -553,58 +549,59 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %edi, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movl %edx, %esi +; X32-NEXT: adcl %eax, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %edi, %edx ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl $0, %edx +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %ebx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 32(%eax), %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -612,7 +609,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi @@ -624,7 +621,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl 36(%eax), %ecx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl %ecx, %esi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax @@ -652,7 +649,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax @@ -665,14 +662,14 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %esi, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 40(%eax), %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi @@ -711,7 +708,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl (%esp), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -743,56 +740,53 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %edi, %esi -; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %eax, %edi +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NEXT: imull %edi, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %edx +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: imull %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %edi +; X32-NEXT: addl %edx, %edi ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edx, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -801,62 +795,60 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx ; X32-NEXT: movl 56(%edi), %esi -; X32-NEXT: movl %edi, %ebx ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl 48(%edi), %edi -; X32-NEXT: movl 52(%ebx), %ebp +; X32-NEXT: movl 48(%edi), %ecx +; X32-NEXT: movl 52(%edi), %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: imull %ebp, %ebx -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %edx +; X32-NEXT: imull %ebx, %edx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %ecx, %edi +; X32-NEXT: addl %edx, %edi ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %edx, %edi +; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %ebp -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -888,10 +880,10 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 32(%ecx), %esi ; X32-NEXT: movl %esi, %eax @@ -925,8 +917,8 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -959,20 +951,20 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload @@ -980,60 +972,60 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %edi, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl %ecx, %ebp -; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 48(%ecx), %edi +; X32-NEXT: movl 48(%ecx), %ebp ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: imull %edi, %esi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: imull %ebp, %esi +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl 52(%ecx), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: imull %eax, %ebx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: addl %edx, %ebx ; X32-NEXT: movl 56(%ecx), %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: imull %ebp, %esi +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: imull %ecx, %edx ; X32-NEXT: movl 60(%esi), %esi -; X32-NEXT: imull %ecx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %edi, %esi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: mull %edi ; X32-NEXT: addl %edx, %esi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi @@ -1050,43 +1042,43 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: imull %eax, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %edi, %ebp +; X32-NEXT: addl %edx, %ebp ; X32-NEXT: mull %edi +; X32-NEXT: addl %edx, %ebp +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: imull %ebp, %edi -; X32-NEXT: addl %edx, %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: adcl %ecx, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %edi @@ -1105,9 +1097,9 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -1179,271 +1171,270 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind { ; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx -; X64-NEXT: pushq %rax -; X64-NEXT: movq %rdx, (%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq (%rdi), %rbx +; X64-NEXT: movq (%rdi), %r14 ; X64-NEXT: movq 8(%rdi), %r9 -; X64-NEXT: movq 24(%rdi), %r12 -; X64-NEXT: movq 16(%rdi), %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq (%rsi), %rcx -; X64-NEXT: movq 8(%rsi), %r11 -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: movq %r14, %rax -; X64-NEXT: movq %rcx, %rsi -; X64-NEXT: mulq %rcx +; X64-NEXT: movq 24(%rdi), %r15 +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq (%rsi), %rdi +; X64-NEXT: movq 8(%rsi), %rbx +; X64-NEXT: movq %rsi, %r12 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r12, %rax -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r15 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %rcx, %r10 ; X64-NEXT: adcq $0, %r8 -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r10, %rcx -; X64-NEXT: adcq %r8, %r14 +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %rbx +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %r10, %r11 +; X64-NEXT: adcq %r8, %rcx ; X64-NEXT: setb %al ; X64-NEXT: movzbl %al, %esi -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %r14, %r10 +; X64-NEXT: addq %rcx, %r10 ; X64-NEXT: adcq %rsi, %r13 -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: movq %r14, %rax +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %r14, %r15 -; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: addq %rcx, %r15 +; X64-NEXT: adcq $0, %r14 +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, %r8 +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %rbx +; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: addq %r15, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %rbx +; X64-NEXT: adcq %r14, %rbp ; X64-NEXT: setb %sil +; X64-NEXT: movq %r9, %rdi ; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; X64-NEXT: adcq %rcx, %r14 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq %r11, %r14 ; X64-NEXT: adcq $0, %r10 ; X64-NEXT: adcq $0, %r13 -; X64-NEXT: movq %rdi, %rsi -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq 16(%rdi), %r8 -; X64-NEXT: movq %r12, %r11 +; X64-NEXT: movq %r12, %r9 ; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq %r9, %r12 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rcx, %r15 -; X64-NEXT: adcq $0, %rbx -; X64-NEXT: movq 24(%rsi), %rsi -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq 16(%r12), %rsi +; X64-NEXT: movq %r8, %rbx +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r12 +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %r11, %rbp +; X64-NEXT: adcq $0, %r15 +; X64-NEXT: movq 24(%r9), %rdi +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %r15, %r11 -; X64-NEXT: adcq %rbx, %r9 -; X64-NEXT: setb %bl +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rbp, %rbx +; X64-NEXT: adcq %r15, %r9 +; X64-NEXT: setb %bpl ; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rsi +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r9, %rcx -; X64-NEXT: movzbl %bl, %eax +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %r9, %r11 +; X64-NEXT: movzbl %bpl, %eax ; X64-NEXT: adcq %rax, %r15 -; X64-NEXT: addq %rbp, %rdi -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r14, %r11 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %rcx +; X64-NEXT: addq %rcx, %r8 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r14, %rbx +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %r11 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: addq %r10, %rcx +; X64-NEXT: addq %r10, %r11 ; X64-NEXT: adcq %r13, %r15 -; X64-NEXT: setb %r12b -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: setb %bpl ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdi, %rbx -; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq %r14, %rax ; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: addq %rbx, %rax +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: adcq %r9, %rbp -; X64-NEXT: setb %dil -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: movq %r13, %rax ; X64-NEXT: mulq %rsi -; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movzbl %dil, %edi -; X64-NEXT: adcq %rdi, %rdx -; X64-NEXT: addq %rcx, %r11 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r15, %rbx +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rcx, %r9 +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: addq %r9, %rax +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: adcq %r8, %rcx +; X64-NEXT: setb %r8b +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rcx, %r14 +; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: adcq %rax, %rdx +; X64-NEXT: addq %r11, %rbx ; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl %r12b, %ecx -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r15, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movzbl %bpl, %eax +; X64-NEXT: adcq %rax, %r14 ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq 32(%rcx), %r15 -; X64-NEXT: imulq %r15, %rsi -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: movq 32(%rcx), %r11 +; X64-NEXT: imulq %r11, %rdi +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq 40(%rcx), %rsi -; X64-NEXT: imulq %rsi, %r8 -; X64-NEXT: addq %rdx, %r8 +; X64-NEXT: movq 40(%rcx), %r8 +; X64-NEXT: imulq %r8, %rsi +; X64-NEXT: addq %rdi, %rsi +; X64-NEXT: addq %rdx, %rsi ; X64-NEXT: movq 48(%rcx), %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: imulq %r14, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: mulq %rbx +; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %rdi, %rdx -; X64-NEXT: movq 56(%r11), %r11 -; X64-NEXT: imulq %rbx, %r11 -; X64-NEXT: addq %rdx, %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: imulq %r10, %rcx +; X64-NEXT: movq 56(%rdx), %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: imulq %rdi, %rbp +; X64-NEXT: addq %rcx, %rbp +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: addq %rdx, %rbp ; X64-NEXT: addq %r9, %rcx -; X64-NEXT: adcq %r8, %r11 -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %r8 -; X64-NEXT: mulq %r15 +; X64-NEXT: adcq %rsi, %rbp +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rsi +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdi, %rbx +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %rdi, %r11 ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %rbx, %r13 +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq %r9, %r15 -; X64-NEXT: setb %dil -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %rsi +; X64-NEXT: setb %sil +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %r15, %r8 -; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: adcq %rax, %r12 ; X64-NEXT: addq %rcx, %r8 -; X64-NEXT: adcq %r11, %r12 +; X64-NEXT: adcq %rbp, %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq 56(%rcx), %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: imulq %rax, %rsi ; X64-NEXT: movq 48(%rcx), %r11 ; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: imulq %r14, %r11 +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: imulq %rcx, %r11 +; X64-NEXT: addq %rsi, %r11 ; X64-NEXT: addq %rdx, %r11 ; X64-NEXT: movq 32(%rdi), %r9 -; X64-NEXT: movq 40(%rdi), %r15 +; X64-NEXT: movq 40(%rdi), %rdi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: imulq %r15, %rsi +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: imulq %rdi, %rdx +; X64-NEXT: imulq %r9, %r13 +; X64-NEXT: addq %rdx, %r13 ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: imulq %r9, %r10 -; X64-NEXT: addq %rdx, %r10 -; X64-NEXT: addq %rcx, %rdi -; X64-NEXT: adcq %r11, %r10 +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: addq %rdx, %r13 +; X64-NEXT: addq %r10, %r15 +; X64-NEXT: adcq %r11, %r13 ; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rsi, %rbx +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %r10, %r11 ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rbx, %r9 -; X64-NEXT: adcq %rbp, %rsi -; X64-NEXT: setb %bl -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %r14 -; X64-NEXT: addq %rsi, %rax -; X64-NEXT: movzbl %bl, %esi -; X64-NEXT: adcq %rsi, %rdx -; X64-NEXT: addq %rdi, %rax -; X64-NEXT: adcq %r10, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: adcq %r13, %r9 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %r10 +; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: addq %r11, %rcx +; X64-NEXT: adcq %rbp, %r9 +; X64-NEXT: setb %r11b +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: addq %r9, %rax +; X64-NEXT: movzbl %r11b, %edi +; X64-NEXT: adcq %rdi, %rdx +; X64-NEXT: addq %r15, %rax +; X64-NEXT: adcq %r13, %rdx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq %rbx, %rcx ; X64-NEXT: adcq %r8, %rax ; X64-NEXT: adcq %r12, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq %r14, %rax ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload -; X64-NEXT: movq (%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, (%rsi) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 8(%rsi) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 16(%rsi) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 24(%rsi) -; X64-NEXT: movq %rcx, 32(%rsi) -; X64-NEXT: movq %r9, 40(%rsi) -; X64-NEXT: movq %rax, 48(%rsi) -; X64-NEXT: movq %rdx, 56(%rsi) -; X64-NEXT: addq $8, %rsp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, (%rdi) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, 8(%rdi) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, 16(%rdi) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, 24(%rdi) +; X64-NEXT: movq %rsi, 32(%rdi) +; X64-NEXT: movq %rcx, 40(%rdi) +; X64-NEXT: movq %rax, 48(%rdi) +; X64-NEXT: movq %rdx, 56(%rdi) ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 ; X64-NEXT: popq %r13 diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll index 1edd1fa4373c80..0bc23af920eed4 100644 --- a/llvm/test/CodeGen/X86/mul128.ll +++ b/llvm/test/CodeGen/X86/mul128.ll @@ -9,8 +9,8 @@ define i128 @foo(i128 %t, i128 %u) { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: mulq %rdx -; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %rsi, %r8 +; X64-NEXT: addq %rcx, %r8 ; X64-NEXT: addq %r8, %rdx ; X64-NEXT: retq ; @@ -30,57 +30,56 @@ define i128 @foo(i128 %t, i128 %u) { ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: imull %ecx, %ebp -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: imull %ebp, %esi +; X86-NEXT: addl %eax, %esi +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %ecx ; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %edi, %edx +; X86-NEXT: imull {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: imull %esi, %edi -; X86-NEXT: addl %edx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: addl %ebp, %edi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: imull %ebp, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi -; X86-NEXT: addl %ecx, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %esi, %ecx +; X86-NEXT: imull %edi, %ecx +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: mull %edi ; X86-NEXT: addl %edx, %ecx ; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: adcl %edi, %ecx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %edi +; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %edi, %esi -; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %edi +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: addl %esi, %ebp -; X86-NEXT: adcl %ebx, %edi +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ebp, %edi +; X86-NEXT: adcl %ebx, %esi ; X86-NEXT: setb %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %edi, %eax +; X86-NEXT: addl %esi, %eax ; X86-NEXT: movzbl %bl, %esi ; X86-NEXT: adcl %esi, %edx ; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ebp, 4(%ecx) +; X86-NEXT: movl %edi, 4(%ecx) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, (%ecx) ; X86-NEXT: movl %eax, 8(%ecx) diff --git a/llvm/test/CodeGen/X86/mul64.ll b/llvm/test/CodeGen/X86/mul64.ll index 1feed4b207a239..d2afb2c529e426 100644 --- a/llvm/test/CodeGen/X86/mul64.ll +++ b/llvm/test/CodeGen/X86/mul64.ll @@ -11,8 +11,8 @@ define i64 @foo(i64 %t, i64 %u) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl %ecx, %edx ; X32-NEXT: imull {{[0-9]+}}(%esp), %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: retl diff --git a/llvm/test/CodeGen/X86/muloti.ll b/llvm/test/CodeGen/X86/muloti.ll index 9a6cf0b0656626..60a2e21dcd03d9 100644 --- a/llvm/test/CodeGen/X86/muloti.ll +++ b/llvm/test/CodeGen/X86/muloti.ll @@ -22,8 +22,8 @@ define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nou ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: mulq %rbx ; CHECK-NEXT: movq %rax, %r8 -; CHECK-NEXT: addq %rdi, %rdx ; CHECK-NEXT: imulq %rcx, %rbx +; CHECK-NEXT: addq %rdi, %rbx ; CHECK-NEXT: addq %rdx, %rbx ; CHECK-NEXT: movq %rcx, %rdi ; CHECK-NEXT: sarq $63, %rdi @@ -32,8 +32,8 @@ define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nou ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: mulq %r9 ; CHECK-NEXT: movq %rax, %r10 -; CHECK-NEXT: addq %r14, %rdx ; CHECK-NEXT: imulq %r9, %rdi +; CHECK-NEXT: addq %r14, %rdi ; CHECK-NEXT: addq %rdx, %rdi ; CHECK-NEXT: addq %r8, %r10 ; CHECK-NEXT: adcq %rbx, %rdi diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll index 43110af64c77e0..fb222cd2b2e106 100644 --- a/llvm/test/CodeGen/X86/popcnt.ll +++ b/llvm/test/CodeGen/X86/popcnt.ll @@ -352,23 +352,23 @@ define i128 @cnt128(i128 %x) nounwind readnone { ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F ; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %edx -; X86-NOSSE-NEXT: movl %ecx, %edi -; X86-NOSSE-NEXT: shrl %edi -; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %edi, %ecx -; X86-NOSSE-NEXT: movl %ecx, %edi -; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %esi, %edx +; X86-NOSSE-NEXT: movl %ecx, %esi +; X86-NOSSE-NEXT: shrl %esi +; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %esi, %ecx +; X86-NOSSE-NEXT: movl %ecx, %esi +; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 ; X86-NOSSE-NEXT: shrl $2, %ecx ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %edi, %ecx -; X86-NOSSE-NEXT: movl %ecx, %edi -; X86-NOSSE-NEXT: shrl $4, %edi -; X86-NOSSE-NEXT: addl %ecx, %edi -; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 +; X86-NOSSE-NEXT: addl %esi, %ecx +; X86-NOSSE-NEXT: movl %ecx, %esi +; X86-NOSSE-NEXT: shrl $4, %esi +; X86-NOSSE-NEXT: addl %ecx, %esi +; X86-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %esi, %ecx # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %ecx ; X86-NOSSE-NEXT: addl %edx, %ecx -; X86-NOSSE-NEXT: addl %esi, %ecx ; X86-NOSSE-NEXT: movl %ecx, (%eax) ; X86-NOSSE-NEXT: movl $0, 12(%eax) ; X86-NOSSE-NEXT: movl $0, 8(%eax) @@ -420,20 +420,18 @@ define i128 @cnt128(i128 %x) nounwind readnone { ; ; X86-POPCNT-LABEL: cnt128: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: pushl %esi ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx ; X86-POPCNT-NEXT: addl %ecx, %edx ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi -; X86-POPCNT-NEXT: addl %ecx, %esi -; X86-POPCNT-NEXT: addl %edx, %esi -; X86-POPCNT-NEXT: movl %esi, (%eax) +; X86-POPCNT-NEXT: addl %edx, %ecx +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X86-POPCNT-NEXT: addl %ecx, %edx +; X86-POPCNT-NEXT: movl %edx, (%eax) ; X86-POPCNT-NEXT: movl $0, 12(%eax) ; X86-POPCNT-NEXT: movl $0, 8(%eax) ; X86-POPCNT-NEXT: movl $0, 4(%eax) -; X86-POPCNT-NEXT: popl %esi ; X86-POPCNT-NEXT: retl $4 ; ; X64-POPCNT-LABEL: cnt128: @@ -800,83 +798,80 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOSSE-NEXT: movl %ebx, %ecx -; X86-NOSSE-NEXT: shrl %ecx -; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %edi, %ecx -; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %ecx, %ebx -; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 -; X86-NOSSE-NEXT: movl %ebx, %ebp -; X86-NOSSE-NEXT: andl %ecx, %ebp +; X86-NOSSE-NEXT: movl %ebx, %eax +; X86-NOSSE-NEXT: shrl %eax +; X86-NOSSE-NEXT: movl $1431655765, %ecx # imm = 0x55555555 +; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: subl %eax, %ebx +; X86-NOSSE-NEXT: movl $858993459, %eax # imm = 0x33333333 +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: andl %eax, %edi ; X86-NOSSE-NEXT: shrl $2, %ebx -; X86-NOSSE-NEXT: andl %ecx, %ebx -; X86-NOSSE-NEXT: addl %ebp, %ebx -; X86-NOSSE-NEXT: movl %ebx, %ebp -; X86-NOSSE-NEXT: shrl $4, %ebp -; X86-NOSSE-NEXT: addl %ebx, %ebp -; X86-NOSSE-NEXT: movl %eax, %ebx +; X86-NOSSE-NEXT: andl %eax, %ebx +; X86-NOSSE-NEXT: addl %edi, %ebx +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: shrl $4, %edi +; X86-NOSSE-NEXT: addl %ebx, %edi +; X86-NOSSE-NEXT: movl %esi, %ebx ; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl %edi, %ebx -; X86-NOSSE-NEXT: subl %ebx, %eax -; X86-NOSSE-NEXT: movl %eax, %ebx ; X86-NOSSE-NEXT: andl %ecx, %ebx -; X86-NOSSE-NEXT: shrl $2, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax -; X86-NOSSE-NEXT: addl %ebx, %eax -; X86-NOSSE-NEXT: movl %eax, %edi -; X86-NOSSE-NEXT: shrl $4, %edi -; X86-NOSSE-NEXT: addl %eax, %edi -; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: andl %ebx, %ebp -; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %eax -; X86-NOSSE-NEXT: andl %ebx, %edi -; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edi -; X86-NOSSE-NEXT: addl %eax, %edi -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: shrl %eax -; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %ebp, %eax -; X86-NOSSE-NEXT: subl %eax, %esi -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: subl %ebx, %esi +; X86-NOSSE-NEXT: movl %esi, %ebx +; X86-NOSSE-NEXT: andl %eax, %ebx ; X86-NOSSE-NEXT: shrl $2, %esi -; X86-NOSSE-NEXT: andl %ecx, %esi -; X86-NOSSE-NEXT: addl %eax, %esi -; X86-NOSSE-NEXT: movl %esi, %ebp -; X86-NOSSE-NEXT: shrl $4, %ebp -; X86-NOSSE-NEXT: addl %esi, %ebp -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: shrl %eax -; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %esi, %eax -; X86-NOSSE-NEXT: subl %eax, %edx -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: andl %eax, %esi +; X86-NOSSE-NEXT: addl %ebx, %esi +; X86-NOSSE-NEXT: movl %esi, %ebx +; X86-NOSSE-NEXT: shrl $4, %ebx +; X86-NOSSE-NEXT: addl %esi, %ebx +; X86-NOSSE-NEXT: movl $252645135, %esi # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: andl %esi, %edi +; X86-NOSSE-NEXT: imull $16843009, %edi, %ebp # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %ebp +; X86-NOSSE-NEXT: andl %esi, %ebx +; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edi +; X86-NOSSE-NEXT: addl %ebp, %edi +; X86-NOSSE-NEXT: movl %edx, %ebx +; X86-NOSSE-NEXT: shrl %ebx +; X86-NOSSE-NEXT: andl %ecx, %ebx +; X86-NOSSE-NEXT: subl %ebx, %edx +; X86-NOSSE-NEXT: movl %edx, %ebx +; X86-NOSSE-NEXT: andl %eax, %ebx ; X86-NOSSE-NEXT: shrl $2, %edx -; X86-NOSSE-NEXT: andl %ecx, %edx -; X86-NOSSE-NEXT: addl %eax, %edx -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: shrl $4, %eax -; X86-NOSSE-NEXT: addl %edx, %eax -; X86-NOSSE-NEXT: andl %ebx, %ebp -; X86-NOSSE-NEXT: andl %ebx, %eax -; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %ecx -; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101 +; X86-NOSSE-NEXT: andl %eax, %edx +; X86-NOSSE-NEXT: addl %ebx, %edx +; X86-NOSSE-NEXT: movl %edx, %ebp +; X86-NOSSE-NEXT: shrl $4, %ebp +; X86-NOSSE-NEXT: addl %edx, %ebp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOSSE-NEXT: andl %esi, %ebp +; X86-NOSSE-NEXT: imull $16843009, %ebp, %edx # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %edx -; X86-NOSSE-NEXT: addl %ecx, %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: addl %edi, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: movl %ecx, 12(%eax) -; X86-NOSSE-NEXT: movl %ecx, 8(%eax) -; X86-NOSSE-NEXT: movl %ecx, 4(%eax) -; X86-NOSSE-NEXT: movl %edx, (%eax) +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: shrl %edi +; X86-NOSSE-NEXT: andl %ecx, %edi +; X86-NOSSE-NEXT: subl %edi, %ebx +; X86-NOSSE-NEXT: movl %ebx, %ecx +; X86-NOSSE-NEXT: andl %eax, %ecx +; X86-NOSSE-NEXT: shrl $2, %ebx +; X86-NOSSE-NEXT: andl %eax, %ebx +; X86-NOSSE-NEXT: addl %ecx, %ebx +; X86-NOSSE-NEXT: movl %ebx, %ecx +; X86-NOSSE-NEXT: shrl $4, %ecx +; X86-NOSSE-NEXT: addl %ebx, %ecx +; X86-NOSSE-NEXT: andl %esi, %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %ecx +; X86-NOSSE-NEXT: addl %edx, %ecx +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: movl %edx, 12(%eax) +; X86-NOSSE-NEXT: movl %edx, 8(%eax) +; X86-NOSSE-NEXT: movl %edx, 4(%eax) +; X86-NOSSE-NEXT: movl %ecx, (%eax) ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %edi ; X86-NOSSE-NEXT: popl %ebx @@ -925,21 +920,19 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; ; X86-POPCNT-LABEL: cnt128_optsize: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: pushl %esi ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx ; X86-POPCNT-NEXT: addl %ecx, %edx ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi -; X86-POPCNT-NEXT: addl %ecx, %esi -; X86-POPCNT-NEXT: addl %edx, %esi +; X86-POPCNT-NEXT: addl %edx, %ecx +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X86-POPCNT-NEXT: addl %ecx, %edx ; X86-POPCNT-NEXT: xorl %ecx, %ecx ; X86-POPCNT-NEXT: movl %ecx, 12(%eax) ; X86-POPCNT-NEXT: movl %ecx, 8(%eax) ; X86-POPCNT-NEXT: movl %ecx, 4(%eax) -; X86-POPCNT-NEXT: movl %esi, (%eax) -; X86-POPCNT-NEXT: popl %esi +; X86-POPCNT-NEXT: movl %edx, (%eax) ; X86-POPCNT-NEXT: retl $4 ; ; X64-POPCNT-LABEL: cnt128_optsize: @@ -1230,83 +1223,80 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOSSE-NEXT: movl %ebx, %ecx -; X86-NOSSE-NEXT: shrl %ecx -; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %edi, %ecx -; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %ecx, %ebx -; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 -; X86-NOSSE-NEXT: movl %ebx, %ebp -; X86-NOSSE-NEXT: andl %ecx, %ebp +; X86-NOSSE-NEXT: movl %ebx, %eax +; X86-NOSSE-NEXT: shrl %eax +; X86-NOSSE-NEXT: movl $1431655765, %ecx # imm = 0x55555555 +; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: subl %eax, %ebx +; X86-NOSSE-NEXT: movl $858993459, %eax # imm = 0x33333333 +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: andl %eax, %edi ; X86-NOSSE-NEXT: shrl $2, %ebx -; X86-NOSSE-NEXT: andl %ecx, %ebx -; X86-NOSSE-NEXT: addl %ebp, %ebx -; X86-NOSSE-NEXT: movl %ebx, %ebp -; X86-NOSSE-NEXT: shrl $4, %ebp -; X86-NOSSE-NEXT: addl %ebx, %ebp -; X86-NOSSE-NEXT: movl %eax, %ebx +; X86-NOSSE-NEXT: andl %eax, %ebx +; X86-NOSSE-NEXT: addl %edi, %ebx +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: shrl $4, %edi +; X86-NOSSE-NEXT: addl %ebx, %edi +; X86-NOSSE-NEXT: movl %esi, %ebx ; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl %edi, %ebx -; X86-NOSSE-NEXT: subl %ebx, %eax -; X86-NOSSE-NEXT: movl %eax, %ebx ; X86-NOSSE-NEXT: andl %ecx, %ebx -; X86-NOSSE-NEXT: shrl $2, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax -; X86-NOSSE-NEXT: addl %ebx, %eax -; X86-NOSSE-NEXT: movl %eax, %edi -; X86-NOSSE-NEXT: shrl $4, %edi -; X86-NOSSE-NEXT: addl %eax, %edi -; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: andl %ebx, %ebp -; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %eax -; X86-NOSSE-NEXT: andl %ebx, %edi -; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edi -; X86-NOSSE-NEXT: addl %eax, %edi -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: shrl %eax -; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %ebp, %eax -; X86-NOSSE-NEXT: subl %eax, %esi -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: subl %ebx, %esi +; X86-NOSSE-NEXT: movl %esi, %ebx +; X86-NOSSE-NEXT: andl %eax, %ebx ; X86-NOSSE-NEXT: shrl $2, %esi -; X86-NOSSE-NEXT: andl %ecx, %esi -; X86-NOSSE-NEXT: addl %eax, %esi -; X86-NOSSE-NEXT: movl %esi, %ebp -; X86-NOSSE-NEXT: shrl $4, %ebp -; X86-NOSSE-NEXT: addl %esi, %ebp -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: shrl %eax -; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %esi, %eax -; X86-NOSSE-NEXT: subl %eax, %edx -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: andl %eax, %esi +; X86-NOSSE-NEXT: addl %ebx, %esi +; X86-NOSSE-NEXT: movl %esi, %ebx +; X86-NOSSE-NEXT: shrl $4, %ebx +; X86-NOSSE-NEXT: addl %esi, %ebx +; X86-NOSSE-NEXT: movl $252645135, %esi # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: andl %esi, %edi +; X86-NOSSE-NEXT: imull $16843009, %edi, %ebp # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %ebp +; X86-NOSSE-NEXT: andl %esi, %ebx +; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edi +; X86-NOSSE-NEXT: addl %ebp, %edi +; X86-NOSSE-NEXT: movl %edx, %ebx +; X86-NOSSE-NEXT: shrl %ebx +; X86-NOSSE-NEXT: andl %ecx, %ebx +; X86-NOSSE-NEXT: subl %ebx, %edx +; X86-NOSSE-NEXT: movl %edx, %ebx +; X86-NOSSE-NEXT: andl %eax, %ebx ; X86-NOSSE-NEXT: shrl $2, %edx -; X86-NOSSE-NEXT: andl %ecx, %edx -; X86-NOSSE-NEXT: addl %eax, %edx -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: shrl $4, %eax -; X86-NOSSE-NEXT: addl %edx, %eax -; X86-NOSSE-NEXT: andl %ebx, %ebp -; X86-NOSSE-NEXT: andl %ebx, %eax -; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %ecx -; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101 +; X86-NOSSE-NEXT: andl %eax, %edx +; X86-NOSSE-NEXT: addl %ebx, %edx +; X86-NOSSE-NEXT: movl %edx, %ebp +; X86-NOSSE-NEXT: shrl $4, %ebp +; X86-NOSSE-NEXT: addl %edx, %ebp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOSSE-NEXT: andl %esi, %ebp +; X86-NOSSE-NEXT: imull $16843009, %ebp, %edx # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %edx -; X86-NOSSE-NEXT: addl %ecx, %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: addl %edi, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: movl %ecx, 12(%eax) -; X86-NOSSE-NEXT: movl %ecx, 8(%eax) -; X86-NOSSE-NEXT: movl %ecx, 4(%eax) -; X86-NOSSE-NEXT: movl %edx, (%eax) +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: shrl %edi +; X86-NOSSE-NEXT: andl %ecx, %edi +; X86-NOSSE-NEXT: subl %edi, %ebx +; X86-NOSSE-NEXT: movl %ebx, %ecx +; X86-NOSSE-NEXT: andl %eax, %ecx +; X86-NOSSE-NEXT: shrl $2, %ebx +; X86-NOSSE-NEXT: andl %eax, %ebx +; X86-NOSSE-NEXT: addl %ecx, %ebx +; X86-NOSSE-NEXT: movl %ebx, %ecx +; X86-NOSSE-NEXT: shrl $4, %ecx +; X86-NOSSE-NEXT: addl %ebx, %ecx +; X86-NOSSE-NEXT: andl %esi, %ecx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %ecx +; X86-NOSSE-NEXT: addl %edx, %ecx +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: movl %edx, 12(%eax) +; X86-NOSSE-NEXT: movl %edx, 8(%eax) +; X86-NOSSE-NEXT: movl %edx, 4(%eax) +; X86-NOSSE-NEXT: movl %ecx, (%eax) ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %edi ; X86-NOSSE-NEXT: popl %ebx @@ -1355,21 +1345,19 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; ; X86-POPCNT-LABEL: cnt128_pgso: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: pushl %esi ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx ; X86-POPCNT-NEXT: addl %ecx, %edx ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi -; X86-POPCNT-NEXT: addl %ecx, %esi -; X86-POPCNT-NEXT: addl %edx, %esi +; X86-POPCNT-NEXT: addl %edx, %ecx +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X86-POPCNT-NEXT: addl %ecx, %edx ; X86-POPCNT-NEXT: xorl %ecx, %ecx ; X86-POPCNT-NEXT: movl %ecx, 12(%eax) ; X86-POPCNT-NEXT: movl %ecx, 8(%eax) ; X86-POPCNT-NEXT: movl %ecx, 4(%eax) -; X86-POPCNT-NEXT: movl %esi, (%eax) -; X86-POPCNT-NEXT: popl %esi +; X86-POPCNT-NEXT: movl %edx, (%eax) ; X86-POPCNT-NEXT: retl $4 ; ; X64-POPCNT-LABEL: cnt128_pgso: diff --git a/llvm/test/CodeGen/X86/pr34080-2.ll b/llvm/test/CodeGen/X86/pr34080-2.ll index de34bfb13159ca..1705edab9e82b0 100644 --- a/llvm/test/CodeGen/X86/pr34080-2.ll +++ b/llvm/test/CodeGen/X86/pr34080-2.ll @@ -31,10 +31,6 @@ define void @computeJD(ptr) nounwind { ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: imull %edx ; CHECK-NEXT: movl %edx, %edi -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: sarl $7, %edi -; CHECK-NEXT: addl %eax, %edi ; CHECK-NEXT: imull $36525, %esi, %eax # imm = 0x8EAD ; CHECK-NEXT: addl $172251900, %eax # imm = 0xA445AFC ; CHECK-NEXT: movl $1374389535, %edx # imm = 0x51EB851F @@ -43,7 +39,11 @@ define void @computeJD(ptr) nounwind { ; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: sarl $5, %edx ; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: addl 16(%ebx), %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: sarl $7, %edi ; CHECK-NEXT: addl %edi, %ecx ; CHECK-NEXT: leal 257(%ecx,%edx), %eax ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/pr36865.ll b/llvm/test/CodeGen/X86/pr36865.ll index 580426a069f41b..35d644027e1b0c 100644 --- a/llvm/test/CodeGen/X86/pr36865.ll +++ b/llvm/test/CodeGen/X86/pr36865.ll @@ -13,10 +13,10 @@ define void @main() { ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movl (%rax), %ecx ; CHECK-NEXT: addl 0, %eax -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: addl %ecx, %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: imull %eax, %ecx diff --git a/llvm/test/CodeGen/X86/reassociate-add.ll b/llvm/test/CodeGen/X86/reassociate-add.ll index 402cd5166e3627..76f5535a3beb1c 100644 --- a/llvm/test/CodeGen/X86/reassociate-add.ll +++ b/llvm/test/CodeGen/X86/reassociate-add.ll @@ -12,12 +12,12 @@ define void @add8(i8 %x0, i8 %x1, i8 %x2, i8* %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: orb $16, %dil ; CHECK-NEXT: orb $32, %sil +; CHECK-NEXT: addb %dil, %sil ; CHECK-NEXT: addb $-8, %dl ; CHECK-NEXT: orb $7, %dl ; CHECK-NEXT: movzbl %dl, %eax ; CHECK-NEXT: imull $100, %eax, %eax ; CHECK-NEXT: addb %sil, %al -; CHECK-NEXT: addb %dil, %al ; CHECK-NEXT: movb %al, (%rcx) ; CHECK-NEXT: retq %v0 = or i8 %x0, 16 @@ -36,11 +36,11 @@ define void @add16(i16 %x0, i16 %x1, i16 %x2, i16* %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: orl $16, %edi ; CHECK-NEXT: orl $32, %esi +; CHECK-NEXT: addl %edi, %esi ; CHECK-NEXT: addl $-8, %edx ; CHECK-NEXT: orl $7, %edx ; CHECK-NEXT: imull $100, %edx, %eax ; CHECK-NEXT: addl %esi, %eax -; CHECK-NEXT: addl %edi, %eax ; CHECK-NEXT: movw %ax, (%rcx) ; CHECK-NEXT: retq %v0 = or i16 %x0, 16 @@ -59,11 +59,11 @@ define void @add32(i32 %x0, i32 %x1, i32 %x2, i32* %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: orl $16, %edi ; CHECK-NEXT: orl $32, %esi +; CHECK-NEXT: addl %edi, %esi ; CHECK-NEXT: addl $-8, %edx ; CHECK-NEXT: orl $7, %edx ; CHECK-NEXT: imull $100, %edx, %eax ; CHECK-NEXT: addl %esi, %eax -; CHECK-NEXT: addl %edi, %eax ; CHECK-NEXT: movl %eax, (%rcx) ; CHECK-NEXT: retq %v0 = or i32 %x0, 16 @@ -82,11 +82,11 @@ define void @add64(i64 %x0, i64 %x1, i64 %x2, i64* %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: orq $16, %rdi ; CHECK-NEXT: orq $32, %rsi +; CHECK-NEXT: addq %rdi, %rsi ; CHECK-NEXT: addq $-8, %rdx ; CHECK-NEXT: orq $7, %rdx ; CHECK-NEXT: imulq $100, %rdx, %rax ; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: movq %rax, (%rcx) ; CHECK-NEXT: retq %v0 = or i64 %x0, 16 diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll index 83802ce4344263..83b9460c7dae33 100644 --- a/llvm/test/CodeGen/X86/smul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll @@ -212,6 +212,7 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ebx ; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %eax, %ecx @@ -233,16 +234,14 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ebp -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %edx, %edi ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx +; X86-NEXT: mull %ebp ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %eax, %edi @@ -251,26 +250,27 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: addl %eax, %ebx ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: adcl $0, %edx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %eax, %edx ; X86-NEXT: adcl $0, %esi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: setb %al -; X86-NEXT: addl %ebp, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ecx, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl %edi, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: adcl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -294,76 +294,76 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: setb %bl ; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: adcl %eax, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, %edi ; X86-NEXT: addl %ecx, %edi -; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: addl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %esi, %ebp +; X86-NEXT: adcl %ebx, %ebp ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %ebp, %esi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ebp, %ebx ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: adcl (%esp), %edx # 4-byte Folded Reload ; X86-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: mull %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebx +; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %edi, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %edi, %esi ; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movl %eax, %esi ; X86-NEXT: adcl %ebp, %ecx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %ecx, %ebp ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebp -; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %edi +; X86-NEXT: adcl %eax, %edi +; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl (%esp), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ebp -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: adcl $0, %edi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: setb (%esp) # 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -386,182 +386,176 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: addl %ecx, %ebx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edx +; X86-NEXT: adcl %eax, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: addl %edi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: adcl %ebp, %esi +; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl %edi, %edx ; X86-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload ; X86-NEXT: adcl %eax, %ebx -; X86-NEXT: adcl $0, %edx +; X86-NEXT: adcl $0, %esi ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %ecx +; X86-NEXT: adcl $0, %ebx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %ebx, %ecx +; X86-NEXT: adcl %edi, %ebx ; X86-NEXT: setb %al -; X86-NEXT: addl %edi, %ecx +; X86-NEXT: addl %esi, %ebx ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: adcl %edx, %eax ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %ebp -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %edi, %esi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: addl %ebp, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %edi, %ebx -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: addl %eax, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X86-NEXT: adcl %edx, %esi +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: adcl $0, %edx +; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %esi, %edx +; X86-NEXT: setb %cl +; X86-NEXT: addl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl %cl, %esi +; X86-NEXT: adcl %edi, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: addl %ebx, %edi +; X86-NEXT: addl %edx, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl %esi, %eax -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: adcl $0, %esi ; X86-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NEXT: adcl $0, %edx ; X86-NEXT: addl %ebp, %edi ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: adcl %esi, %eax +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $0, %eax -; X86-NEXT: addl %ebx, %edi +; X86-NEXT: addl %esi, %ecx ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: setb %al -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movzbl %al, %edi -; X86-NEXT: adcl %ecx, %edi -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: adcl $0, %eax -; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: setb %dl +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: adcl %ebx, %esi +; X86-NEXT: movl (%esp), %ebx # 4-byte Reload ; X86-NEXT: adcl $0, %ebx -; X86-NEXT: adcl $0, %edx -; X86-NEXT: adcl $0, %edi -; X86-NEXT: adcl $0, %eax -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: adcl $0, %eax +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: adcl $0, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl %eax, %edi ; X86-NEXT: movl %eax, %ebp -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: adcl $0, %edi -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %edx, %edi +; X86-NEXT: addl %edx, %ebp +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: addl %eax, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %edx, %ebx ; X86-NEXT: setb %cl -; X86-NEXT: addl %eax, %edi +; X86-NEXT: addl %eax, %ebx ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %edx, %ecx ; X86-NEXT: movl %eax, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %edi, %edx +; X86-NEXT: addl %ebx, %edx +; X86-NEXT: adcl %ecx, %ebp +; X86-NEXT: movl %ebx, %esi ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: adcl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %esi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl %ecx, %ebx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: addl %ebp, %edx +; X86-NEXT: addl %edi, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: adcl %edi, %ebp ; X86-NEXT: movl %eax, %edx -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: adcl $0, %eax ; X86-NEXT: addl %esi, %edx ; X86-NEXT: adcl %ecx, %eax -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: setb %bl ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl %ecx, %edx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %edx -; X86-NEXT: adcl %esi, %edx -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X86-NEXT: adcl %ebx, %eax +; X86-NEXT: adcl %edi, %edx +; X86-NEXT: movzbl %bl, %eax +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ebx @@ -623,51 +617,49 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: imull %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: imull %eax, %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: imull %eax, %ecx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: imull %eax, %edx -; X86-NEXT: movl %eax, %ebx ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl %esi, %eax -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: adcl %ebx, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: imull %ebx -; X86-NEXT: addl %eax, %eax +; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %eax, %ebx ; X86-NEXT: adcl %edx, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: adcl %ebx, %edx +; X86-NEXT: adcl %ebx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: adcl %edx, %ebx ; X86-NEXT: addl %edi, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -721,18 +713,17 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %edx, %ecx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: imull %edx, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: imull %edx, %ecx ; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: imull %edx, %ebp -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X86-NEXT: imull {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl %ebp, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl %eax, %ebx @@ -841,188 +832,190 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx -; X64-NEXT: movq %r9, %r10 -; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r8, %r12 +; X64-NEXT: movq %rcx, %r15 +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rsi, %r8 ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; X64-NEXT: andl $1, %ecx -; X64-NEXT: negq %rcx -; X64-NEXT: andl $1, %r12d -; X64-NEXT: negq %r12 -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rbp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: andl $1, %esi +; X64-NEXT: negq %rsi +; X64-NEXT: andl $1, %r15d +; X64-NEXT: negq %r15 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %rbp ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %rdx, %r15 -; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r10 -; X64-NEXT: movq %r10, %r14 +; X64-NEXT: addq %rdx, %rbx +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %r9 +; X64-NEXT: addq %rax, %rbx +; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: setb %dil -; X64-NEXT: movzbl %dil, %r10d -; X64-NEXT: addq %rax, %r9 -; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movzbl %dil, %r13d +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: adcq %rdx, %r13 ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rbp +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rsi, %r11 -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %r13 -; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: adcq $0, %r10 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: addq %r11, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rdi, %rbx -; X64-NEXT: setb %dil -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movzbl %dil, %edx -; X64-NEXT: adcq %rdx, %rsi -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; X64-NEXT: adcq %r10, %rdi +; X64-NEXT: setb %r11b +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movzbl %r11b, %edx +; X64-NEXT: adcq %rdx, %r10 +; X64-NEXT: addq %rbp, %rax ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: adcq %r15, %rsi -; X64-NEXT: adcq $0, %r9 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: adcq %rbx, %r10 +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: adcq $0, %r13 +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r8 +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r14 +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %r11, %rbx -; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: addq %r11, %rbp +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %r15 -; X64-NEXT: addq %r13, %rbx -; X64-NEXT: adcq %r11, %r15 +; X64-NEXT: adcq $0, %r11 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r8, %rbp +; X64-NEXT: adcq %rax, %r11 ; X64-NEXT: setb %al -; X64-NEXT: addq %r8, %r15 -; X64-NEXT: movzbl %al, %r8d -; X64-NEXT: adcq %rdx, %r8 -; X64-NEXT: addq %r13, %rdi +; X64-NEXT: addq %r9, %r11 +; X64-NEXT: movzbl %al, %r14d +; X64-NEXT: adcq %rdx, %r14 +; X64-NEXT: addq %r8, %rdi ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rsi, %rbx -; X64-NEXT: adcq $0, %r15 -; X64-NEXT: adcq $0, %r8 -; X64-NEXT: addq %r9, %r15 -; X64-NEXT: adcq %r10, %r8 -; X64-NEXT: setb %r10b -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rdx, %r11 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: addq %rax, %r11 -; X64-NEXT: adcq %rdx, %rdi -; X64-NEXT: setb %r9b -; X64-NEXT: addq %rax, %rdi -; X64-NEXT: movzbl %r9b, %esi -; X64-NEXT: adcq %rdx, %rsi -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %r8, %r11 -; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rdi -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: movq %rsi, %r8 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: addq %r14, %r8 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; X64-NEXT: adcq %r10, %rbp +; X64-NEXT: adcq $0, %r11 +; X64-NEXT: adcq $0, %r14 +; X64-NEXT: addq %rcx, %r11 +; X64-NEXT: adcq %r13, %r14 +; X64-NEXT: setb %dil +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rdx, %rbx ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: adcq $0, %r10 +; X64-NEXT: addq %rax, %rbx +; X64-NEXT: adcq %rdx, %r10 +; X64-NEXT: setb %cl +; X64-NEXT: addq %rax, %r10 +; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: adcq %rdx, %rcx +; X64-NEXT: addq %rax, %r11 +; X64-NEXT: adcq %r14, %rbx +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: adcq %rax, %r10 +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: addq %rax, %r14 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rcx +; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: addq %r9, %r8 -; X64-NEXT: adcq %r14, %r10 +; X64-NEXT: addq %r9, %r14 +; X64-NEXT: adcq %rax, %rcx +; X64-NEXT: movq %rax, %rdx ; X64-NEXT: setb %al -; X64-NEXT: addq %rsi, %r10 -; X64-NEXT: movzbl %al, %esi -; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: addq %rdi, %rcx +; X64-NEXT: movzbl %al, %edi +; X64-NEXT: adcq %r8, %rdi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %r12, %rax -; X64-NEXT: addq %r14, %rax -; X64-NEXT: imulq %r12, %rbp -; X64-NEXT: addq %rax, %rbp -; X64-NEXT: movq %r12, %rax -; X64-NEXT: imulq %rcx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r9, %r12 +; X64-NEXT: imulq %r15, %rax +; X64-NEXT: imulq %r15, %r12 ; X64-NEXT: addq %rax, %r12 -; X64-NEXT: adcq %rdx, %rbp -; X64-NEXT: addq %r10, %r12 -; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: movq %r13, %r14 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: addq %rsi, %r14 -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: addq %rdx, %r12 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: imulq %rsi +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r9, %r8 +; X64-NEXT: addq %rax, %r8 +; X64-NEXT: adcq %rdx, %r12 +; X64-NEXT: addq %rcx, %r8 +; X64-NEXT: adcq %rdi, %r12 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: movq %r13, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: addq %rcx, %r15 +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: addq %rdi, %r15 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: addq %rdx, %r14 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: adcq %r9, %rsi -; X64-NEXT: setb %r10b +; X64-NEXT: adcq %rdx, %rcx +; X64-NEXT: setb %r9b +; X64-NEXT: addq %rdi, %rcx +; X64-NEXT: movzbl %r9b, %edi +; X64-NEXT: adcq %rdx, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; X64-NEXT: imulq %rsi, %rdx +; X64-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: addq %rdx, %rsi -; X64-NEXT: movzbl %r10b, %r10d -; X64-NEXT: adcq %r9, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: imulq %rcx, %r9 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: addq %r9, %rcx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: addq %r13, %rax -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: addq %rsi, %rax -; X64-NEXT: adcq %r10, %rcx +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rdi, %rsi ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: adcq %r8, %r14 -; X64-NEXT: adcq %r12, %rax -; X64-NEXT: adcq %rbp, %rcx -; X64-NEXT: addq %r15, %r13 -; X64-NEXT: adcq %r11, %r14 -; X64-NEXT: adcq %rdi, %rax -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rbx, %rdx -; X64-NEXT: sarq $63, %rdx -; X64-NEXT: xorq %rdx, %rcx -; X64-NEXT: xorq %rdx, %r14 -; X64-NEXT: orq %rcx, %r14 -; X64-NEXT: xorq %rdx, %rax -; X64-NEXT: orq %r14, %rax -; X64-NEXT: xorq %r13, %rdx -; X64-NEXT: orq %rax, %rdx +; X64-NEXT: adcq %r14, %r15 +; X64-NEXT: adcq %r8, %rax +; X64-NEXT: adcq %r12, %rsi +; X64-NEXT: addq %r11, %r13 +; X64-NEXT: adcq %rbx, %r15 +; X64-NEXT: adcq %r10, %rax +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: movq %rbp, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: xorq %rcx, %rsi +; X64-NEXT: xorq %rcx, %r15 +; X64-NEXT: orq %rsi, %r15 +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: orq %r15, %rax +; X64-NEXT: xorq %r13, %rcx +; X64-NEXT: orq %rax, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movl %eax, %esi ; X64-NEXT: andl $1, %esi -; X64-NEXT: movq %rsi, %rcx -; X64-NEXT: negq %rcx -; X64-NEXT: xorq %rcx, %rbx -; X64-NEXT: xorq %rax, %rcx -; X64-NEXT: orq %rbx, %rcx -; X64-NEXT: orq %rdx, %rcx +; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: negq %rdx +; X64-NEXT: xorq %rdx, %rbp +; X64-NEXT: xorq %rax, %rdx +; X64-NEXT: orq %rbp, %rdx +; X64-NEXT: orq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, 8(%rax) diff --git a/llvm/test/CodeGen/X86/smul_fix.ll b/llvm/test/CodeGen/X86/smul_fix.ll index 8c2b945d6a8ce1..fd25842180922e 100644 --- a/llvm/test/CodeGen/X86/smul_fix.ll +++ b/llvm/test/CodeGen/X86/smul_fix.ll @@ -231,8 +231,8 @@ define i64 @func5(i64 %x, i64 %y) { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %edx ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %ecx, %esi ; X86-NEXT: addl %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll index 996601ed3be644..0532916b1e4ca3 100644 --- a/llvm/test/CodeGen/X86/smul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll @@ -369,8 +369,8 @@ define i64 @func5(i64 %x, i64 %y) { ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: subl $8, %esp -; X86-NEXT: .cfi_def_cfa_offset 28 +; X86-NEXT: subl $12, %esp +; X86-NEXT: .cfi_def_cfa_offset 32 ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 @@ -383,62 +383,62 @@ define i64 @func5(i64 %x, i64 %y) { ; X86-NEXT: movl %eax, %edi ; X86-NEXT: imull %ebx, %edi ; X86-NEXT: mull %ebx -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: addl %edi, %edx -; X86-NEXT: movl %ebp, %edi +; X86-NEXT: movl %eax, %esi ; X86-NEXT: imull %ebp, %ebx +; X86-NEXT: addl %edi, %ebx ; X86-NEXT: addl %edx, %ebx +; X86-NEXT: movl %ebp, %edi ; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: imull %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %edi, %edx +; X86-NEXT: imull %ecx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %edi, %eax -; X86-NEXT: mull %esi -; X86-NEXT: addl %ebp, %edx -; X86-NEXT: imull %esi, %edi +; X86-NEXT: imull %ebp, %edi +; X86-NEXT: addl %edx, %edi +; X86-NEXT: mull %ebp ; X86-NEXT: addl %edx, %edi -; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-NEXT: addl %esi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl %ebx, %edi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: addl %eax, %ebp -; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: addl %eax, %ebx +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: mull %edx +; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %eax, %ebp -; X86-NEXT: adcl %ebx, %esi -; X86-NEXT: setb %bl +; X86-NEXT: addl %eax, %ebx +; X86-NEXT: adcl %ebp, %esi +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: mull %ebp ; X86-NEXT: addl %esi, %eax -; X86-NEXT: movzbl %bl, %esi +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X86-NEXT: adcl %esi, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl %edi, %edx -; X86-NEXT: movl %ebp, %edi +; X86-NEXT: movl %ebx, %edi ; X86-NEXT: sarl $31, %edi ; X86-NEXT: xorl %edi, %edx ; X86-NEXT: xorl %eax, %edi -; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: xorl %ebp, %ecx ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: movl %ecx, %esi ; X86-NEXT: xorl $2147483647, %esi # imm = 0x7FFFFFFF ; X86-NEXT: orl %edx, %edi ; X86-NEXT: notl %ecx -; X86-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovel %ebp, %esi +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovel %ebx, %esi ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl %esi, %edx -; X86-NEXT: addl $8, %esp +; X86-NEXT: addl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 20 ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 16 diff --git a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll index 367ca660cda14f..fc40c539f37c78 100644 --- a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll @@ -23,8 +23,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rdi, %rdx ; X64-NEXT: imulq %rcx, %r14 +; X64-NEXT: addq %rdi, %r14 ; X64-NEXT: addq %rdx, %r14 ; X64-NEXT: movq %rcx, %rdi ; X64-NEXT: sarq $63, %rdi @@ -33,8 +33,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %r15, %rdx ; X64-NEXT: imulq %r10, %rdi +; X64-NEXT: addq %r15, %rdi ; X64-NEXT: addq %rdx, %rdi ; X64-NEXT: addq %r9, %r11 ; X64-NEXT: adcq %r14, %rdi @@ -84,8 +84,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: subl $56, %esp -; X86-NEXT: .cfi_def_cfa_offset 76 +; X86-NEXT: subl $52, %esp +; X86-NEXT: .cfi_def_cfa_offset 72 ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 @@ -102,20 +102,19 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %edi ; X86-NEXT: addl %ecx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: adcl $0, %esi ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ebp -; X86-NEXT: setb %bl +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %edi, %ebp +; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx -; X86-NEXT: addl %ebp, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill -; X86-NEXT: movzbl %bl, %eax +; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -127,198 +126,197 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %esi, %ebp +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %esi, %ebx ; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edi, %ebx +; X86-NEXT: adcl %edi, %esi ; X86-NEXT: setb %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: mull %edx ; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ebx, %edi +; X86-NEXT: addl %esi, %edi ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: adcl %ebp, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %ebx, %ecx -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl %ebp, %esi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: adcl %esi, %ebx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %esi, %ecx +; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebp +; X86-NEXT: adcl %eax, %esi ; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: adcl $0, %ebp +; X86-NEXT: adcl $0, %esi ; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: setb (%esp) ## 1-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: mull %edx +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: adcl %edi, %ebx -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: setb %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl %ebp, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movzbl (%esp), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %eax, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %edi, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: sarl $31, %esi ; X86-NEXT: movl %esi, %edi ; X86-NEXT: imull {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: mull %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: mull %ebp ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %edi, %edx +; X86-NEXT: imull %esi, %ebp +; X86-NEXT: addl %edi, %ebp +; X86-NEXT: addl %edx, %ebp +; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imull %eax, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: imull %esi, %ebx ; X86-NEXT: addl %edx, %ebx -; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill -; X86-NEXT: movl %esi, %ebx -; X86-NEXT: imull {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %edx, %ebx -; X86-NEXT: imull %esi, %ebp -; X86-NEXT: addl %ebx, %ebp -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: addl %eax, %ecx -; X86-NEXT: adcl (%esp), %ebp ## 4-byte Folded Reload -; X86-NEXT: movl %ebp, (%esp) ## 4-byte Spill +; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %ebp, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, %esi ; X86-NEXT: addl %edi, %esi ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: adcl $0, %ebp -; X86-NEXT: addl %ebx, %esi +; X86-NEXT: addl %ecx, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl %edi, %ebp ; X86-NEXT: setb %bl ; X86-NEXT: addl %eax, %ebp ; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: adcl (%esp), %eax ## 4-byte Folded Reload +; X86-NEXT: addl (%esp), %ebp ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %esi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %edx, %esi -; X86-NEXT: imull %ebx, %ecx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: imull %ebx, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: imull %ebx, %ecx -; X86-NEXT: mull %ebx ; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: imull %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: imull %esi, %ebx +; X86-NEXT: addl %ecx, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %ebx, %eax -; X86-NEXT: addl %edx, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; X86-NEXT: addl %ecx, (%esp) ## 4-byte Folded Spill -; X86-NEXT: adcl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: addl %esi, %edi -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: imull %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: imull %esi, %ecx +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: mull %esi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: addl %edi, %ebx +; X86-NEXT: movl (%esp), %eax ## 4-byte Reload +; X86-NEXT: addl %eax, %esi +; X86-NEXT: adcl %ebx, %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %edi, %ebx +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %eax, %ebx +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: addl %eax, %edi -; X86-NEXT: adcl %edx, %esi -; X86-NEXT: setb %bl -; X86-NEXT: addl %eax, %esi -; X86-NEXT: movzbl %bl, %eax +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl %ebp, %esi +; X86-NEXT: addl %esi, %edi +; X86-NEXT: adcl %ecx, %eax +; X86-NEXT: movl (%esp), %esi ## 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: adcl %ebp, %edi ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: xorl %edx, %eax -; X86-NEXT: xorl %edx, %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: xorl %edx, %esi -; X86-NEXT: xorl %ecx, %edx -; X86-NEXT: orl %esi, %edx -; X86-NEXT: orl %edi, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: xorl %ecx, %ebx +; X86-NEXT: orl %eax, %ebx +; X86-NEXT: xorl %ecx, %edi +; X86-NEXT: xorl %esi, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: orl %ebx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %edx, 12(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload @@ -326,7 +324,7 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: setne %al -; X86-NEXT: addl $56, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -360,8 +358,9 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X64-NEXT: .cfi_offset %r14, -32 ; X64-NEXT: .cfi_offset %r15, -24 ; X64-NEXT: .cfi_offset %rbp, -16 +; X64-NEXT: movq %r8, %rbx ; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rsi, %r15 ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill @@ -372,31 +371,29 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rsi, %r10 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rsi, %r8 ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r12 -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %r10, %r14 +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: addq %r8, %r10 ; X64-NEXT: adcq %rcx, %r12 ; X64-NEXT: setb %al ; X64-NEXT: movzbl %al, %ecx ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r12, %rbx -; X64-NEXT: adcq %rcx, %r11 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %r12, %r14 +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %r8, %rcx -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: addq %r8, %r13 @@ -405,69 +402,69 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X64-NEXT: movq %r9, %rsi ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: adcq %r12, %r10 +; X64-NEXT: adcq %r12, %r11 ; X64-NEXT: setb %cl ; X64-NEXT: movq %r15, %r9 ; X64-NEXT: movq %r15, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r10, %r8 +; X64-NEXT: addq %r11, %r8 ; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: adcq %rax, %rbp ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Folded Reload -; X64-NEXT: adcq %r14, %rbp -; X64-NEXT: adcq $0, %rbx -; X64-NEXT: adcq $0, %r11 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: adcq %r10, %rbp +; X64-NEXT: adcq $0, %r14 +; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Spill +; X64-NEXT: movq %rdi, %rcx ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rax, %rdi ; X64-NEXT: movq %r9, %rax ; X64-NEXT: movq %r9, %rsi ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %r10, %r9 +; X64-NEXT: addq %r11, %r9 ; X64-NEXT: adcq $0, %r13 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: addq %r9, %rax -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: adcq %r13, %r11 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: adcq %r13, %r10 ; X64-NEXT: setb %cl ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %r11, %r13 +; X64-NEXT: addq %r10, %r13 ; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: adcq %rax, %r10 -; X64-NEXT: addq %r8, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: adcq %rbp, %rdi +; X64-NEXT: adcq %rax, %r11 +; X64-NEXT: addq %r8, %rdi ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: adcq %rbp, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; X64-NEXT: adcq $0, %r13 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: addq %rbx, %r13 -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 ## 8-byte Folded Reload -; X64-NEXT: setb %cl +; X64-NEXT: adcq $0, %r11 +; X64-NEXT: addq %r14, %r13 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 ## 8-byte Folded Reload +; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 ## 8-byte Reload +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r8 @@ -476,118 +473,117 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: addq %r8, %rax ; X64-NEXT: movq %rax, %rsi +; X64-NEXT: addq %r8, %rsi ; X64-NEXT: adcq %rdi, %r9 ; X64-NEXT: setb %r8b -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %r9, %r14 ; X64-NEXT: movzbl %r8b, %eax ; X64-NEXT: adcq %rax, %rbp -; X64-NEXT: addq %r13, %r11 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: adcq %r10, %rsi +; X64-NEXT: addq %r13, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: adcq %r11, %rsi ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 1-byte Folded Reload ; X64-NEXT: adcq %rax, %r14 ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rbx, %r13 -; X64-NEXT: movq %rbx, %r10 +; X64-NEXT: movq %r10, %r13 ; X64-NEXT: sarq $63, %r13 ; X64-NEXT: movq %r13, %rcx ; X64-NEXT: imulq %r12, %rcx ; X64-NEXT: movq %r13, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %r13, %r15 +; X64-NEXT: addq %rcx, %r15 ; X64-NEXT: addq %rdx, %r15 ; X64-NEXT: movq %r13, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload -; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: imulq %rsi, %rcx +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: imulq %r13, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: addq %rdx, %rcx -; X64-NEXT: imulq %r13, %rsi -; X64-NEXT: addq %rcx, %rsi -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X64-NEXT: addq %rdx, %rbx ; X64-NEXT: addq %rax, %r8 -; X64-NEXT: adcq %r15, %rsi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: adcq %r15, %rbx +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %r9, %r15 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: adcq $0, %r13 -; X64-NEXT: addq %rcx, %r15 +; X64-NEXT: addq %r11, %r15 ; X64-NEXT: adcq %r9, %r13 ; X64-NEXT: setb %cl ; X64-NEXT: addq %rax, %r13 ; X64-NEXT: movzbl %cl, %r9d ; X64-NEXT: adcq %rdx, %r9 ; X64-NEXT: addq %r8, %r13 -; X64-NEXT: adcq %rsi, %r9 +; X64-NEXT: adcq %rbx, %r9 ; X64-NEXT: sarq $63, %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: imulq %r12, %r8 -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: addq %rdx, %r8 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: imulq %r12, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload ; X64-NEXT: movq %rdi, %rbx ; X64-NEXT: imulq %r12, %rbx -; X64-NEXT: addq %r8, %rbx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: imulq %r12, %rcx -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rcx, %rdx +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload +; X64-NEXT: movq %r8, %rdx +; X64-NEXT: imulq %r12, %rdx ; X64-NEXT: imulq %r12, %r10 ; X64-NEXT: addq %rdx, %r10 +; X64-NEXT: movq %r10, %rcx +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rdx, %rcx +; X64-NEXT: addq %r10, %rbx ; X64-NEXT: addq %rsi, %r8 -; X64-NEXT: adcq %rbx, %r10 +; X64-NEXT: adcq %rbx, %rcx ; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: addq %r11, %rbx -; X64-NEXT: adcq $0, %r11 +; X64-NEXT: addq %r10, %rbx +; X64-NEXT: adcq $0, %r10 ; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %rax, %rbx -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: setb %cl -; X64-NEXT: addq %rax, %r11 -; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: adcq %rdx, %r10 +; X64-NEXT: setb %r12b +; X64-NEXT: addq %rax, %r10 +; X64-NEXT: movzbl %r12b, %eax ; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %r8, %r11 -; X64-NEXT: adcq %r10, %rax -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Folded Reload +; X64-NEXT: addq %r8, %r10 +; X64-NEXT: adcq %rcx, %rax +; X64-NEXT: addq %r11, %rsi ; X64-NEXT: adcq %r15, %rbx -; X64-NEXT: adcq %r13, %r11 +; X64-NEXT: adcq %r13, %r10 ; X64-NEXT: adcq %r9, %rax ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Folded Reload -; X64-NEXT: adcq %r14, %r11 +; X64-NEXT: adcq %r14, %r10 ; X64-NEXT: adcq %rbp, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: sarq $63, %rcx -; X64-NEXT: xorq %rcx, %rax -; X64-NEXT: xorq %rcx, %rbx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: sarq $63, %rdx +; X64-NEXT: xorq %rdx, %rax +; X64-NEXT: xorq %rdx, %rbx ; X64-NEXT: orq %rax, %rbx -; X64-NEXT: xorq %rcx, %r11 -; X64-NEXT: xorq %rsi, %rcx -; X64-NEXT: orq %r11, %rcx -; X64-NEXT: orq %rbx, %rcx +; X64-NEXT: xorq %rdx, %r10 +; X64-NEXT: xorq %rsi, %rdx +; X64-NEXT: orq %r10, %rdx +; X64-NEXT: orq %rbx, %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax -; X64-NEXT: movq %rdx, 24(%rax) +; X64-NEXT: movq %rcx, 24(%rax) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload ; X64-NEXT: movq %rcx, (%rax) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload @@ -613,42 +609,43 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: subl $156, %esp -; X86-NEXT: .cfi_def_cfa_offset 176 +; X86-NEXT: subl $152, %esp +; X86-NEXT: .cfi_def_cfa_offset 172 ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %ecx, %ebp ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %edi, %eax +; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: adcl %edi, %ecx ; X86-NEXT: setb %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill ; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %eax, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: adcl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: mull %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill @@ -659,354 +656,353 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: addl %ecx, %ebp ; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edi, %ebx -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill +; X86-NEXT: adcl %edi, %esi +; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %ebx, %ecx -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload +; X86-NEXT: mull %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %esi, %edi +; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %esi, (%esp) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %esi +; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %ebx, %esi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %esi, %ebx ; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %edi, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: adcl %ebp, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: adcl %ebp, %ecx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %edi, %ebp +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ecx, %esi ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebx -; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %eax, %ebp +; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %ebp -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload -; X86-NEXT: adcl (%esp), %ebx ## 4-byte Folded Reload +; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload ; X86-NEXT: setb (%esp) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: adcl $0, %esi +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: mull %ecx ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: adcl %edi, %ecx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload ; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl %ebx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl %ebp, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movzbl (%esp), %ecx ## 1-byte Folded Reload ; X86-NEXT: adcl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ecx -; X86-NEXT: setb %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %eax, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: addl %ecx, %ebx ; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edi, %ebp +; X86-NEXT: adcl %edi, %ecx ; X86-NEXT: setb %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: mull %esi +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %ebx, %esi -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %ecx, %ebp +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: adcl %ebp, %edi -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %edi, %esi +; X86-NEXT: setb %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %edi, %ebp -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebx -; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %esi, %edi +; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: adcl %eax, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %esi, %ebx ; X86-NEXT: adcl $0, %ebp -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: addl (%esp), %ebp ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: adcl %ebp, %ecx +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload +; X86-NEXT: adcl %eax, %ebp +; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl (%esp), %ebp ## 4-byte Folded Reload +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: mull %ecx ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %edi, %ecx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ecx, %edi +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload +; X86-NEXT: adcl %eax, %ebx ; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload -; X86-NEXT: addl %ebp, %ecx -; X86-NEXT: movl %edi, %esi -; X86-NEXT: adcl %ebx, %esi -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 1-byte Folded Reload -; X86-NEXT: adcl %edi, %eax -; X86-NEXT: adcl $0, %edx +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; X86-NEXT: adcl %ebp, %edx +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload +; X86-NEXT: adcl %eax, %edi +; X86-NEXT: adcl $0, %ebx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi +; X86-NEXT: movl %esi, %edi ; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %edi, %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebx, %eax +; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ecx -; X86-NEXT: setb %bl -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: adcl %ebx, %esi +; X86-NEXT: setb %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %esi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movzbl %bl, %eax +; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebx, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi -; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl %ebp, %edi +; X86-NEXT: setb %cl ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ebp -; X86-NEXT: setb %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %ebp, %ecx -; X86-NEXT: movzbl %bl, %eax +; X86-NEXT: mull %esi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %edi, %esi +; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %edi, %ebx -; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, %ebx -; X86-NEXT: adcl %ebp, %esi +; X86-NEXT: adcl %edi, %ebp ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %esi, %edi +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %ebp, %ecx ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %ebp -; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl %eax, %edi +; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %ecx ; X86-NEXT: adcl $0, %edi -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl $0, %esi +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: mull %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: adcl %ebx, %esi ; X86-NEXT: setb %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movzbl %bl, %ecx -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movzbl %bl, %esi +; X86-NEXT: adcl %esi, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: addl %edi, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; X86-NEXT: adcl %ebp, %edi +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: adcl %edi, %ebp ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload ; X86-NEXT: adcl %ecx, %eax ; X86-NEXT: adcl $0, %edx @@ -1021,7 +1017,7 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl %ebx, %edx ; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movl %ebp, %ecx ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: adcl $0, %eax ; X86-NEXT: adcl $0, %esi @@ -1063,103 +1059,103 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %esi, %ebx -; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %edi, %ebp +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebx, %eax +; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %edi, %ecx +; X86-NEXT: adcl %ebx, %ecx ; X86-NEXT: setb %bl -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ecx, %edi ; X86-NEXT: movzbl %bl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: mull %ebp ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: mull %ecx ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: addl %ebp, %eax -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: adcl %edi, %ebx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: adcl %ebp, %ebx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %ebx, %ecx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %ebx, %ebp ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edi -; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %eax, %ecx +; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: adcl $0, %edi -; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: addl (%esp), %ebp ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: mull %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %esi, %ebp -; X86-NEXT: adcl $0, %ebx +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %esi, %ebx +; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: mull %edx ; X86-NEXT: movl %edx, %esi -; X86-NEXT: addl %ebp, %eax -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: adcl %ebx, %esi +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %edi, %esi ; X86-NEXT: setb (%esp) ## 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: addl %esi, %ebx ; X86-NEXT: movzbl (%esp), %eax ## 1-byte Folded Reload -; X86-NEXT: movl %edx, %esi -; X86-NEXT: adcl %eax, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: adcl %edi, %ebp +; X86-NEXT: adcl %eax, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; X86-NEXT: addl %ebp, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; X86-NEXT: adcl %ecx, %esi ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload ; X86-NEXT: adcl %eax, %ebx -; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload @@ -1169,14 +1165,14 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl $0, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: sarl $31, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1187,89 +1183,88 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %esi, %ebp -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %esi, %ebx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: adcl $0, %ebp +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill +; X86-NEXT: adcl %esi, %ebp ; X86-NEXT: setb %cl -; X86-NEXT: addl %eax, %ebx +; X86-NEXT: addl %eax, %ebp ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %edx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: adcl $0, %ecx -; X86-NEXT: addl (%esp), %edx ## 4-byte Folded Reload +; X86-NEXT: addl %esi, %edx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: adcl $0, %esi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %eax, %ecx -; X86-NEXT: setb %al -; X86-NEXT: addl %ebp, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movzbl %al, %edx -; X86-NEXT: adcl %esi, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; X86-NEXT: adcl %eax, %esi +; X86-NEXT: setb %dl +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movzbl %dl, %edx +; X86-NEXT: adcl %ebx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: movl (%esp), %eax ## 4-byte Reload ; X86-NEXT: adcl %edx, %eax -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: adcl $0, %ebp +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload ; X86-NEXT: adcl $0, %esi -; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: adcl $0, %edx -; X86-NEXT: addl %ebp, %ecx +; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: adcl %esi, %edx ; X86-NEXT: setb %al ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: adcl (%esp), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: adcl %ebx, %eax +; X86-NEXT: adcl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: imull %edi, %ecx -; X86-NEXT: addl %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: imull %edi, %esi ; X86-NEXT: addl %ecx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: imull %edi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: imull %edi, %ecx +; X86-NEXT: addl %edx, %ecx ; X86-NEXT: mull %edi ; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %edi, %ecx ; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; X86-NEXT: addl %ebp, %esi +; X86-NEXT: movl (%esp), %eax ## 4-byte Reload ; X86-NEXT: addl %eax, %ebx ; X86-NEXT: adcl %esi, %ecx ; X86-NEXT: movl %eax, %esi @@ -1287,26 +1282,23 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: addl %ebx, %ebp ; X86-NEXT: adcl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %edi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: movl %ecx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: imull %edi, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: imull %edi, %ecx ; X86-NEXT: addl %edx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: imull %edi, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: imull {{[0-9]+}}(%esp), %edi ; X86-NEXT: addl %edx, %edi -; X86-NEXT: movl (%esp), %edx ## 4-byte Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; X86-NEXT: addl %edx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: addl %edx, %esi +; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl %ecx, %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: addl %edx, %ecx ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload ; X86-NEXT: addl %ebx, %ecx @@ -1317,35 +1309,36 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: addl %ebx, %edx ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %eax, %ecx -; X86-NEXT: addl %esi, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: adcl %edi, %ecx ; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload +; X86-NEXT: addl %esi, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: adcl %ebp, %edx ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: movl %eax, %ebp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: addl %edx, %ecx ; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill @@ -1354,189 +1347,187 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: adcl %edx, %esi ; X86-NEXT: setb %bl ; X86-NEXT: addl %eax, %esi -; X86-NEXT: movzbl %bl, %ebx -; X86-NEXT: adcl %edx, %ebx -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movzbl %bl, %edi +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl %ebp, %eax ; X86-NEXT: addl %esi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: adcl %ebx, %eax +; X86-NEXT: adcl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %esi -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: addl %edx, %ebp -; X86-NEXT: adcl $0, %edi +; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %eax, %ebp -; X86-NEXT: adcl %edx, %edi +; X86-NEXT: adcl %edx, %ebx ; X86-NEXT: setb %cl -; X86-NEXT: addl %eax, %edi +; X86-NEXT: addl %eax, %ebx ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %edx, %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill ; X86-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: movl %edi, %edx +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; X86-NEXT: adcl $0, %eax ; X86-NEXT: addl %esi, %edx -; X86-NEXT: adcl %ebx, %eax +; X86-NEXT: adcl %edi, %eax ; X86-NEXT: movl %eax, %esi ; X86-NEXT: setb %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: addl %ebx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; X86-NEXT: addl %edi, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl %ebp, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: adcl %edi, %eax +; X86-NEXT: adcl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; X86-NEXT: imull %esi, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload -; X86-NEXT: imull %ebp, %eax -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %ebp, %eax +; X86-NEXT: imull %esi, %eax ; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: imull %esi, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %ebp, %ecx +; X86-NEXT: imull %esi, %ecx +; X86-NEXT: addl %edx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload ; X86-NEXT: addl %esi, %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %ebp, %ecx -; X86-NEXT: addl %edx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: addl %edx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: addl %edx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; X86-NEXT: movl %edi, %eax ; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: adcl $0, %ebp ; X86-NEXT: addl %edx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl %esi, %edi -; X86-NEXT: setb %dl -; X86-NEXT: addl %ebx, %edi -; X86-NEXT: movzbl %dl, %eax +; X86-NEXT: adcl %esi, %ebp +; X86-NEXT: setb %al +; X86-NEXT: addl %edi, %ebp +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: adcl %ebx, %eax +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload ; X86-NEXT: adcl %ecx, %eax -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %ebp, %ecx -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload +; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: imull %ebp, %esi +; X86-NEXT: imull %ebx, %esi +; X86-NEXT: addl %ecx, %esi ; X86-NEXT: addl %edx, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: imull {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebp -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: imull %eax, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: imull %ebx, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: mull %ebx +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %edx, %esi +; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %ebp, %eax -; X86-NEXT: addl %esi, %eax -; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ebp -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %ebx, %ebp -; X86-NEXT: movl %edx, %esi -; X86-NEXT: adcl $0, %esi -; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: adcl %ebx, %esi -; X86-NEXT: setb %bl -; X86-NEXT: addl %eax, %esi -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %edx, %eax -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload -; X86-NEXT: adcl %edi, %esi +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %edi, %eax +; X86-NEXT: adcl $0, %edx +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %edi, %edx +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; X86-NEXT: addl %esi, %ebx ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl %ebp, %edx +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: addl (%esp), %edx ## 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: adcl (%esp), %ebx ## 4-byte Folded Reload +; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl (%esp), %ebx ## 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: xorl %esi, %edi +; X86-NEXT: xorl %esi, %edx +; X86-NEXT: orl %edi, %edx ; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: xorl %edi, %edx -; X86-NEXT: xorl %edi, %esi -; X86-NEXT: orl %edx, %esi -; X86-NEXT: xorl %edi, %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: xorl %edi, %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: xorl %esi, %edi +; X86-NEXT: orl %edx, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; X86-NEXT: xorl %edi, %edx -; X86-NEXT: xorl %edi, %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: xorl %edi, %ebp -; X86-NEXT: orl %eax, %ebp -; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; X86-NEXT: orl %ebp, %edi -; X86-NEXT: orl %ecx, %edi +; X86-NEXT: xorl %esi, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: xorl %esi, %ecx +; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: xorl %esi, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %edx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebx, 28(%eax) +; X86-NEXT: movl %ebp, 28(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload @@ -1552,7 +1543,7 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload ; X86-NEXT: movl %ecx, 24(%eax) ; X86-NEXT: setne %al -; X86-NEXT: addl $156, %esp +; X86-NEXT: addl $152, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/sse-regcall.ll b/llvm/test/CodeGen/X86/sse-regcall.ll index 0226052402cb8b..7a3e48572b8e2a 100644 --- a/llvm/test/CodeGen/X86/sse-regcall.ll +++ b/llvm/test/CodeGen/X86/sse-regcall.ll @@ -196,7 +196,7 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a ; WIN32: # %bb.0: ; WIN32-NEXT: pushl %ebp ; WIN32-NEXT: pushl %ebx -; WIN32-NEXT: subl $12, %esp +; WIN32-NEXT: subl $16, %esp ; WIN32-NEXT: movl %esi, (%esp) # 4-byte Spill ; WIN32-NEXT: movl %edi, %esi ; WIN32-NEXT: movl %edx, %ebx @@ -207,36 +207,37 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a ; WIN32-NEXT: subl %esi, %ebx ; WIN32-NEXT: movl %edi, %eax ; WIN32-NEXT: subl %ecx, %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; WIN32-NEXT: movl %ebp, %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: imull %eax, %ecx +; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi -; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: subl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: imull %ebx, %eax -; WIN32-NEXT: addl %ecx, %eax +; WIN32-NEXT: movl %esi, %edx +; WIN32-NEXT: subl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: imull %ebx, %edx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; WIN32-NEXT: movl (%esp), %ebx # 4-byte Reload -; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: subl %ebp, %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, %ecx ; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: imull %ebx, %ecx -; WIN32-NEXT: addl %eax, %ecx +; WIN32-NEXT: addl %edx, %ecx ; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload -; WIN32-NEXT: addl {{[0-9]+}}(%esp), %ebp -; WIN32-NEXT: imull %ebp, %edi +; WIN32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: imull %edx, %edi ; WIN32-NEXT: addl {{[0-9]+}}(%esp), %esi ; WIN32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; WIN32-NEXT: addl %esi, %edi -; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: imull %eax, %edx -; WIN32-NEXT: addl %edx, %edi +; WIN32-NEXT: addl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: imull %ebp, %eax +; WIN32-NEXT: addl %esi, %eax +; WIN32-NEXT: addl %eax, %edi ; WIN32-NEXT: addl %ecx, %edi +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; WIN32-NEXT: movl %edi, %eax -; WIN32-NEXT: addl $12, %esp +; WIN32-NEXT: addl $16, %esp ; WIN32-NEXT: popl %ebx ; WIN32-NEXT: popl %ebp ; WIN32-NEXT: retl @@ -270,18 +271,18 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a ; WIN64-NEXT: # kill: def $r11d killed $r11d killed $r11 ; WIN64-NEXT: subl %r12d, %r11d ; WIN64-NEXT: imull %edx, %r11d -; WIN64-NEXT: addl %r9d, %r11d ; WIN64-NEXT: leal (%r14,%r15), %edx -; WIN64-NEXT: movl %r14d, %r9d -; WIN64-NEXT: subl %r15d, %r9d -; WIN64-NEXT: imull %esi, %r9d -; WIN64-NEXT: addl %r11d, %r9d +; WIN64-NEXT: # kill: def $r14d killed $r14d killed $r14 +; WIN64-NEXT: subl %r15d, %r14d +; WIN64-NEXT: imull %esi, %r14d +; WIN64-NEXT: addl %r11d, %r14d ; WIN64-NEXT: addl %ecx, %eax ; WIN64-NEXT: imull %r8d, %eax ; WIN64-NEXT: imull %ebx, %r10d -; WIN64-NEXT: addl %r10d, %eax ; WIN64-NEXT: imull %edi, %edx +; WIN64-NEXT: addl %r10d, %edx ; WIN64-NEXT: addl %edx, %eax +; WIN64-NEXT: addl %r14d, %eax ; WIN64-NEXT: addl %r9d, %eax ; WIN64-NEXT: popq %rbx ; WIN64-NEXT: retq @@ -311,19 +312,19 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a ; LINUXOSX-NEXT: leal (%r13,%r14), %r11d ; LINUXOSX-NEXT: movl %r13d, %r12d ; LINUXOSX-NEXT: subl %r14d, %r12d +; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %r14d ; LINUXOSX-NEXT: imull %edx, %r12d -; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %edx -; LINUXOSX-NEXT: addl %r9d, %r12d -; LINUXOSX-NEXT: movl %r15d, %r9d -; LINUXOSX-NEXT: subl %edx, %r9d -; LINUXOSX-NEXT: imull %esi, %r9d -; LINUXOSX-NEXT: addl %r12d, %r9d +; LINUXOSX-NEXT: movl %r15d, %edx +; LINUXOSX-NEXT: subl %r14d, %edx +; LINUXOSX-NEXT: imull %esi, %edx +; LINUXOSX-NEXT: addl %r12d, %edx ; LINUXOSX-NEXT: addl %ecx, %eax ; LINUXOSX-NEXT: imull %r8d, %eax ; LINUXOSX-NEXT: imull %r10d, %r11d -; LINUXOSX-NEXT: addl %r11d, %eax -; LINUXOSX-NEXT: addl %r15d, %edx -; LINUXOSX-NEXT: imull %edi, %edx +; LINUXOSX-NEXT: addl %r15d, %r14d +; LINUXOSX-NEXT: imull %edi, %r14d +; LINUXOSX-NEXT: addl %r11d, %r14d +; LINUXOSX-NEXT: addl %r14d, %eax ; LINUXOSX-NEXT: addl %edx, %eax ; LINUXOSX-NEXT: addl %r9d, %eax ; LINUXOSX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll index b5b9ce95a46bac..833b13e796787e 100644 --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -98,13 +98,13 @@ define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i ; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-X64-NEXT: .cfi_offset %rax, -16 ; CHECK-X64-NEXT: movl 71888(%rsp), %eax -; CHECK-X64-NEXT: addl %esi, %edi ; CHECK-X64-NEXT: addl %ecx, %edx -; CHECK-X64-NEXT: addl %edi, %edx -; CHECK-X64-NEXT: addl %r9d, %r8d ; CHECK-X64-NEXT: addl 71896(%rsp), %eax +; CHECK-X64-NEXT: addl %esi, %edx +; CHECK-X64-NEXT: addl %r9d, %eax ; CHECK-X64-NEXT: addl %r8d, %eax ; CHECK-X64-NEXT: addl %edx, %eax +; CHECK-X64-NEXT: addl %edi, %eax ; CHECK-X64-NEXT: movl %eax, 264(%rsp) ; CHECK-X64-NEXT: movl %eax, 28664(%rsp) ; CHECK-X64-NEXT: addq $71872, %rsp # imm = 0x118C0 @@ -141,16 +141,16 @@ define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i ; CHECK-X86-NEXT: .cfi_offset %edx, -12 ; CHECK-X86-NEXT: .cfi_offset %esi, -8 ; CHECK-X86-NEXT: movl 72056(%esp), %eax -; CHECK-X86-NEXT: movl 72048(%esp), %edx -; CHECK-X86-NEXT: movl 72040(%esp), %ecx +; CHECK-X86-NEXT: movl 72048(%esp), %ecx +; CHECK-X86-NEXT: movl 72040(%esp), %edx ; CHECK-X86-NEXT: movl 72032(%esp), %esi ; CHECK-X86-NEXT: addl 72036(%esp), %esi -; CHECK-X86-NEXT: addl 72044(%esp), %ecx -; CHECK-X86-NEXT: addl %esi, %ecx -; CHECK-X86-NEXT: addl 72052(%esp), %edx +; CHECK-X86-NEXT: addl 72044(%esp), %edx +; CHECK-X86-NEXT: addl 72052(%esp), %ecx ; CHECK-X86-NEXT: addl 72060(%esp), %eax -; CHECK-X86-NEXT: addl %edx, %eax ; CHECK-X86-NEXT: addl %ecx, %eax +; CHECK-X86-NEXT: addl %edx, %eax +; CHECK-X86-NEXT: addl %esi, %eax ; CHECK-X86-NEXT: movl %eax, 392(%esp) ; CHECK-X86-NEXT: movl %eax, 28792(%esp) ; CHECK-X86-NEXT: addl $72012, %esp # imm = 0x1194C @@ -184,13 +184,13 @@ define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i ; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888 ; CHECK-X32-NEXT: .cfi_offset %rax, -16 ; CHECK-X32-NEXT: movl 71888(%esp), %eax -; CHECK-X32-NEXT: addl %esi, %edi ; CHECK-X32-NEXT: addl %ecx, %edx -; CHECK-X32-NEXT: addl %edi, %edx -; CHECK-X32-NEXT: addl %r9d, %r8d ; CHECK-X32-NEXT: addl 71896(%esp), %eax +; CHECK-X32-NEXT: addl %esi, %edx +; CHECK-X32-NEXT: addl %r9d, %eax ; CHECK-X32-NEXT: addl %r8d, %eax ; CHECK-X32-NEXT: addl %edx, %eax +; CHECK-X32-NEXT: addl %edi, %eax ; CHECK-X32-NEXT: movl %eax, 264(%esp) ; CHECK-X32-NEXT: movl %eax, 28664(%esp) ; CHECK-X32-NEXT: addl $71872, %esp # imm = 0x118C0 diff --git a/llvm/test/CodeGen/X86/statepoint-live-in.ll b/llvm/test/CodeGen/X86/statepoint-live-in.ll index 2b02656071a7bc..84ace4b5b09509 100644 --- a/llvm/test/CodeGen/X86/statepoint-live-in.ll +++ b/llvm/test/CodeGen/X86/statepoint-live-in.ll @@ -442,12 +442,12 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: movl %esi, %r15d +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: movl %edx, %r12d ; CHECK-NEXT: movl %ecx, %r13d -; CHECK-NEXT: movl %r8d, %ebp -; CHECK-NEXT: movl %r9d, %r14d +; CHECK-NEXT: movl %r8d, %r14d +; CHECK-NEXT: movl %r9d, %r15d ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -472,11 +472,10 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: callq _bar ## 160-byte Folded Reload ; CHECK-NEXT: Ltmp13: -; CHECK-NEXT: addq %r15, %rbx ; CHECK-NEXT: addq %r12, %rbx ; CHECK-NEXT: addq %r13, %rbx -; CHECK-NEXT: addq %rbp, %rbx ; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: addq %r15, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -517,6 +516,7 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: addq %rax, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: addq %rbp, %rbx ; CHECK-NEXT: movq %rbx, %rax ; CHECK-NEXT: addq $168, %rsp ; CHECK-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/statepoint-regs.ll b/llvm/test/CodeGen/X86/statepoint-regs.ll index 9338da5de80016..d6fef28bc8c66c 100644 --- a/llvm/test/CodeGen/X86/statepoint-regs.ll +++ b/llvm/test/CodeGen/X86/statepoint-regs.ll @@ -554,12 +554,12 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: movl %esi, %r15d +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: movl %edx, %r12d ; CHECK-NEXT: movl %ecx, %r13d -; CHECK-NEXT: movl %r8d, %ebp -; CHECK-NEXT: movl %r9d, %r14d +; CHECK-NEXT: movl %r8d, %r14d +; CHECK-NEXT: movl %r9d, %r15d ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -584,11 +584,10 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: callq _bar ## 160-byte Folded Reload ; CHECK-NEXT: Ltmp14: -; CHECK-NEXT: addq %r15, %rbx ; CHECK-NEXT: addq %r12, %rbx ; CHECK-NEXT: addq %r13, %rbx -; CHECK-NEXT: addq %rbp, %rbx ; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: addq %r15, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax @@ -629,6 +628,7 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; CHECK-NEXT: addq %rax, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: addq %rbp, %rbx ; CHECK-NEXT: movq %rbx, %rax ; CHECK-NEXT: addq $168, %rsp ; CHECK-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll index d29548d488e63c..24c1509b2325e9 100644 --- a/llvm/test/CodeGen/X86/swift-return.ll +++ b/llvm/test/CodeGen/X86/swift-return.ll @@ -147,9 +147,11 @@ define dso_local i32 @test3(i32 %key) #0 { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq gen3@PLT -; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-NEXT: # kill: def $ecx killed $ecx def $rcx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: addl %r8d, %ecx ; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: addl %r8d, %eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -358,9 +360,9 @@ define swiftcc { double, i64 } @test6() #0 { ; CHECK-NEXT: addsd %xmm1, %xmm0 ; CHECK-NEXT: addsd %xmm2, %xmm0 ; CHECK-NEXT: addsd %xmm3, %xmm0 -; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: addq %rdx, %rcx +; CHECK-NEXT: addq %r8, %rcx ; CHECK-NEXT: addq %rcx, %rax -; CHECK-NEXT: addq %r8, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/twoaddr-lea.ll b/llvm/test/CodeGen/X86/twoaddr-lea.ll index 14186537daea5b..b11c9de00886f2 100644 --- a/llvm/test/CodeGen/X86/twoaddr-lea.ll +++ b/llvm/test/CodeGen/X86/twoaddr-lea.ll @@ -31,11 +31,13 @@ define i32 @test1(i32 %X) nounwind { define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: test2: ; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: ## kill: def $ecx killed $ecx def $rcx +; CHECK-NEXT: ## kill: def $edx killed $edx def $rdx ; CHECK-NEXT: ## kill: def $esi killed $esi def $rsi ; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal (%rdi,%rsi), %eax -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: leal (%rdx,%rcx), %eax +; CHECK-NEXT: addl %esi, %eax ; CHECK-NEXT: retq entry: %add = add i32 %b, %a diff --git a/llvm/test/CodeGen/X86/umul-with-overflow.ll b/llvm/test/CodeGen/X86/umul-with-overflow.ll index b516d69c676df7..fcebba2e90fef7 100644 --- a/llvm/test/CodeGen/X86/umul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/umul-with-overflow.ll @@ -93,7 +93,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %edi @@ -106,7 +106,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: movl %ecx, %ebx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ebp, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl %edi, %esi ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -142,12 +142,12 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: addl %ebx, %esi ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: adcl %eax, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: adcl (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: addl (%esp), %esi # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ebp, %eax @@ -176,13 +176,13 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X86-NEXT: adcl %eax, %ecx ; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: adcl (%esp), %edi # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload +; X86-NEXT: setb (%esp) # 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: mull %edi @@ -201,123 +201,120 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: adcl %edi, %esi -; X86-NEXT: setb (%esp) # 1-byte Folded Spill +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %esi, %eax -; X86-NEXT: movzbl (%esp), %esi # 1-byte Folded Reload +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X86-NEXT: adcl %esi, %edx ; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: adcl %ecx, %ebp ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload ; X86-NEXT: adcl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %ecx, %edi ; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %esi, %ecx -; X86-NEXT: setb %bl -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %edi -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: adcl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: addl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %ebp -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %ebx, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: adcl %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: mull %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %esi, %ecx -; X86-NEXT: setb %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull %edi -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: movzbl %bl, %ecx -; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %esi, %ebp +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %esi +; X86-NEXT: addl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ebx, %esi +; X86-NEXT: setb %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: movzbl %bl, %eax +; X86-NEXT: adcl %eax, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, (%esp) # 4-byte Folded Spill -; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: adcl $0, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ecx, %edi +; X86-NEXT: addl %ebx, %edi ; X86-NEXT: adcl $0, %esi ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: addl %edi, %eax ; X86-NEXT: movl %eax, %edi ; X86-NEXT: adcl %esi, %ebp ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx +; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, %esi ; X86-NEXT: addl %ebp, %esi ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X86-NEXT: adcl %eax, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: addl %ecx, %ebp ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: addl (%esp), %esi # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: imull {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: imull %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %edx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: imull {{[0-9]+}}(%esp), %edi +; X86-NEXT: addl %edx, %edi ; X86-NEXT: addl %ecx, %edi ; X86-NEXT: movl %eax, %edx ; X86-NEXT: addl %esi, %edx @@ -368,7 +365,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ebx @@ -380,7 +377,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %ebp, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl %ebx, %edi ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -428,23 +425,20 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: adcl $0, %edi ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: imull {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: imull %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %eax ; X86-NEXT: mull %ecx ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %edx, %ebp -; X86-NEXT: imull {{[0-9]+}}(%esp), %ebx +; X86-NEXT: addl %edx, %ebx ; X86-NEXT: addl %ebp, %ebx ; X86-NEXT: addl %esi, %ecx ; X86-NEXT: adcl %edi, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload @@ -452,41 +446,44 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: imull {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: imull %edx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %edx ; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %edx, %ebp -; X86-NEXT: imull {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: addl %ebp, %esi ; X86-NEXT: addl %ecx, %edi ; X86-NEXT: adcl %ebx, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: imull {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: imull %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %edx ; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: imull {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: imull %edx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %edx -; X86-NEXT: addl %edx, %ebp -; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx +; X86-NEXT: addl %edx, %ecx ; X86-NEXT: addl %ebp, %ecx ; X86-NEXT: addl %ebx, %eax ; X86-NEXT: adcl %esi, %ecx @@ -501,9 +498,9 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: movl %esi, 8(%edx) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, 12(%edx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, 16(%edx) ; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: movl %esi, 16(%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, 20(%edx) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, 24(%edx) @@ -530,16 +527,16 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r8, %r11 -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, %rbp +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r10, %rbp ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: addq %rbx, %r15 @@ -551,13 +548,13 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: addq %r15, %rbx ; X64-NEXT: adcq %r14, %r12 ; X64-NEXT: setb %al -; X64-NEXT: movzbl %al, %r8d -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movzbl %al, %r10d +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: addq %r12, %r13 -; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: adcq %r10, %r15 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %r12 @@ -565,13 +562,13 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r12, %r8 +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: addq %r12, %r10 ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %r8, %r12 +; X64-NEXT: addq %r10, %r12 ; X64-NEXT: adcq %rbp, %rdx ; X64-NEXT: imulq %r9, %r11 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 @@ -580,30 +577,30 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: adcq %rdx, %r11 ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %r8, %rbp +; X64-NEXT: addq %r10, %rbp ; X64-NEXT: adcq $0, %r13 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %r10 ; X64-NEXT: addq %rbp, %rax ; X64-NEXT: adcq %r13, %rdx -; X64-NEXT: imulq %r8, %r10 -; X64-NEXT: addq %rdx, %r10 +; X64-NEXT: imulq %r10, %r8 +; X64-NEXT: addq %rdx, %r8 ; X64-NEXT: addq %r14, %r15 ; X64-NEXT: adcq %r12, %rax -; X64-NEXT: adcq %r11, %r10 +; X64-NEXT: adcq %r11, %r8 ; X64-NEXT: imulq %r9, %rcx -; X64-NEXT: addq %r10, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: addq %rdx, %rsi +; X64-NEXT: addq %r8, %rsi ; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: movq %rbx, 8(%rdi) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload diff --git a/llvm/test/CodeGen/X86/umul_fix.ll b/llvm/test/CodeGen/X86/umul_fix.ll index cb4bdd1ede75c9..dca010064b5872 100644 --- a/llvm/test/CodeGen/X86/umul_fix.ll +++ b/llvm/test/CodeGen/X86/umul_fix.ll @@ -185,8 +185,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %edx ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %ecx, %esi ; X86-NEXT: addl %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll index 671d7c21013dea..7a5198dd6a0faf 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll @@ -295,19 +295,21 @@ define i1 @t8_3_2(i8 %X) nounwind { define i1 @t64_3_2(i64 %X) nounwind { ; X86-LABEL: t64_3_2: ; X86: # %bb.0: +; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %edx ; X86-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %edx # imm = 0xAAAAAAAB -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %esi # imm = 0xAAAAAAAB +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA -; X86-NEXT: adcl $-1431655766, %edx # imm = 0xAAAAAAAA +; X86-NEXT: adcl $-1431655766, %esi # imm = 0xAAAAAAAA ; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555 -; X86-NEXT: sbbl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: sbbl $1431655765, %esi # imm = 0x55555555 ; X86-NEXT: setb %al +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: t64_3_2: diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll index dbec86755a969d..7bb7c9b481d39b 100644 --- a/llvm/test/CodeGen/X86/vec_smulo.ll +++ b/llvm/test/CodeGen/X86/vec_smulo.ll @@ -3309,8 +3309,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE2-NEXT: movq %r14, %rax ; SSE2-NEXT: mulq %r12 ; SSE2-NEXT: movq %rax, %rdi -; SSE2-NEXT: addq %rbx, %rdx ; SSE2-NEXT: imulq %r9, %r12 +; SSE2-NEXT: addq %rbx, %r12 ; SSE2-NEXT: addq %rdx, %r12 ; SSE2-NEXT: movq %r9, %rbx ; SSE2-NEXT: sarq $63, %rbx @@ -3319,8 +3319,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE2-NEXT: movq %rbx, %rax ; SSE2-NEXT: mulq %r10 ; SSE2-NEXT: movq %rax, %r15 -; SSE2-NEXT: addq %r13, %rdx ; SSE2-NEXT: imulq %r10, %rbx +; SSE2-NEXT: addq %r13, %rbx ; SSE2-NEXT: addq %rdx, %rbx ; SSE2-NEXT: addq %rdi, %r15 ; SSE2-NEXT: adcq %r12, %rbx @@ -3363,8 +3363,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE2-NEXT: movq %rsi, %rax ; SSE2-NEXT: mulq %rbx ; SSE2-NEXT: movq %rax, %r9 -; SSE2-NEXT: addq %r10, %rdx ; SSE2-NEXT: imulq %rbp, %rbx +; SSE2-NEXT: addq %r10, %rbx ; SSE2-NEXT: addq %rdx, %rbx ; SSE2-NEXT: movq %rbp, %r10 ; SSE2-NEXT: sarq $63, %r10 @@ -3373,8 +3373,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE2-NEXT: movq %r10, %rax ; SSE2-NEXT: mulq %r8 ; SSE2-NEXT: movq %rax, %r11 -; SSE2-NEXT: addq %r14, %rdx ; SSE2-NEXT: imulq %r8, %r10 +; SSE2-NEXT: addq %r14, %r10 ; SSE2-NEXT: addq %rdx, %r10 ; SSE2-NEXT: addq %r9, %r11 ; SSE2-NEXT: adcq %rbx, %r10 @@ -3445,8 +3445,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSSE3-NEXT: movq %r14, %rax ; SSSE3-NEXT: mulq %r12 ; SSSE3-NEXT: movq %rax, %rdi -; SSSE3-NEXT: addq %rbx, %rdx ; SSSE3-NEXT: imulq %r9, %r12 +; SSSE3-NEXT: addq %rbx, %r12 ; SSSE3-NEXT: addq %rdx, %r12 ; SSSE3-NEXT: movq %r9, %rbx ; SSSE3-NEXT: sarq $63, %rbx @@ -3455,8 +3455,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSSE3-NEXT: movq %rbx, %rax ; SSSE3-NEXT: mulq %r10 ; SSSE3-NEXT: movq %rax, %r15 -; SSSE3-NEXT: addq %r13, %rdx ; SSSE3-NEXT: imulq %r10, %rbx +; SSSE3-NEXT: addq %r13, %rbx ; SSSE3-NEXT: addq %rdx, %rbx ; SSSE3-NEXT: addq %rdi, %r15 ; SSSE3-NEXT: adcq %r12, %rbx @@ -3499,8 +3499,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSSE3-NEXT: movq %rsi, %rax ; SSSE3-NEXT: mulq %rbx ; SSSE3-NEXT: movq %rax, %r9 -; SSSE3-NEXT: addq %r10, %rdx ; SSSE3-NEXT: imulq %rbp, %rbx +; SSSE3-NEXT: addq %r10, %rbx ; SSSE3-NEXT: addq %rdx, %rbx ; SSSE3-NEXT: movq %rbp, %r10 ; SSSE3-NEXT: sarq $63, %r10 @@ -3509,8 +3509,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSSE3-NEXT: movq %r10, %rax ; SSSE3-NEXT: mulq %r8 ; SSSE3-NEXT: movq %rax, %r11 -; SSSE3-NEXT: addq %r14, %rdx ; SSSE3-NEXT: imulq %r8, %r10 +; SSSE3-NEXT: addq %r14, %r10 ; SSSE3-NEXT: addq %rdx, %r10 ; SSSE3-NEXT: addq %r9, %r11 ; SSSE3-NEXT: adcq %rbx, %r10 @@ -3581,8 +3581,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE41-NEXT: movq %r14, %rax ; SSE41-NEXT: mulq %r12 ; SSE41-NEXT: movq %rax, %rdi -; SSE41-NEXT: addq %rbx, %rdx ; SSE41-NEXT: imulq %r9, %r12 +; SSE41-NEXT: addq %rbx, %r12 ; SSE41-NEXT: addq %rdx, %r12 ; SSE41-NEXT: movq %r9, %rbx ; SSE41-NEXT: sarq $63, %rbx @@ -3591,8 +3591,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE41-NEXT: movq %rbx, %rax ; SSE41-NEXT: mulq %r10 ; SSE41-NEXT: movq %rax, %r15 -; SSE41-NEXT: addq %r13, %rdx ; SSE41-NEXT: imulq %r10, %rbx +; SSE41-NEXT: addq %r13, %rbx ; SSE41-NEXT: addq %rdx, %rbx ; SSE41-NEXT: addq %rdi, %r15 ; SSE41-NEXT: adcq %r12, %rbx @@ -3635,8 +3635,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE41-NEXT: movq %rsi, %rax ; SSE41-NEXT: mulq %rbx ; SSE41-NEXT: movq %rax, %r9 -; SSE41-NEXT: addq %r10, %rdx ; SSE41-NEXT: imulq %rbp, %rbx +; SSE41-NEXT: addq %r10, %rbx ; SSE41-NEXT: addq %rdx, %rbx ; SSE41-NEXT: movq %rbp, %r10 ; SSE41-NEXT: sarq $63, %r10 @@ -3645,8 +3645,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; SSE41-NEXT: movq %r10, %rax ; SSE41-NEXT: mulq %r8 ; SSE41-NEXT: movq %rax, %r11 -; SSE41-NEXT: addq %r14, %rdx ; SSE41-NEXT: imulq %r8, %r10 +; SSE41-NEXT: addq %r14, %r10 ; SSE41-NEXT: addq %rdx, %r10 ; SSE41-NEXT: addq %r9, %r11 ; SSE41-NEXT: adcq %rbx, %r10 @@ -3716,8 +3716,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX-NEXT: movq %r14, %rax ; AVX-NEXT: mulq %r12 ; AVX-NEXT: movq %rax, %rdi -; AVX-NEXT: addq %rbx, %rdx ; AVX-NEXT: imulq %r9, %r12 +; AVX-NEXT: addq %rbx, %r12 ; AVX-NEXT: addq %rdx, %r12 ; AVX-NEXT: movq %r9, %rbx ; AVX-NEXT: sarq $63, %rbx @@ -3726,8 +3726,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX-NEXT: movq %rbx, %rax ; AVX-NEXT: mulq %r10 ; AVX-NEXT: movq %rax, %r15 -; AVX-NEXT: addq %r13, %rdx ; AVX-NEXT: imulq %r10, %rbx +; AVX-NEXT: addq %r13, %rbx ; AVX-NEXT: addq %rdx, %rbx ; AVX-NEXT: addq %rdi, %r15 ; AVX-NEXT: adcq %r12, %rbx @@ -3770,8 +3770,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX-NEXT: movq %rsi, %rax ; AVX-NEXT: mulq %rbx ; AVX-NEXT: movq %rax, %r9 -; AVX-NEXT: addq %r10, %rdx ; AVX-NEXT: imulq %rbp, %rbx +; AVX-NEXT: addq %r10, %rbx ; AVX-NEXT: addq %rdx, %rbx ; AVX-NEXT: movq %rbp, %r10 ; AVX-NEXT: sarq $63, %r10 @@ -3780,8 +3780,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX-NEXT: movq %r10, %rax ; AVX-NEXT: mulq %r8 ; AVX-NEXT: movq %rax, %r11 -; AVX-NEXT: addq %r14, %rdx ; AVX-NEXT: imulq %r8, %r10 +; AVX-NEXT: addq %r14, %r10 ; AVX-NEXT: addq %rdx, %r10 ; AVX-NEXT: addq %r9, %r11 ; AVX-NEXT: adcq %rbx, %r10 @@ -3851,8 +3851,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512F-NEXT: movq %r15, %rax ; AVX512F-NEXT: mulq %r12 ; AVX512F-NEXT: movq %rax, %rcx -; AVX512F-NEXT: addq %rbx, %rdx ; AVX512F-NEXT: imulq %rsi, %r12 +; AVX512F-NEXT: addq %rbx, %r12 ; AVX512F-NEXT: addq %rdx, %r12 ; AVX512F-NEXT: movq %rsi, %rbx ; AVX512F-NEXT: sarq $63, %rbx @@ -3861,8 +3861,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512F-NEXT: movq %rbx, %rax ; AVX512F-NEXT: mulq %r10 ; AVX512F-NEXT: movq %rax, %r14 -; AVX512F-NEXT: addq %r13, %rdx ; AVX512F-NEXT: imulq %r10, %rbx +; AVX512F-NEXT: addq %r13, %rbx ; AVX512F-NEXT: addq %rdx, %rbx ; AVX512F-NEXT: addq %rcx, %r14 ; AVX512F-NEXT: adcq %r12, %rbx @@ -3905,8 +3905,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512F-NEXT: movq %r8, %rax ; AVX512F-NEXT: mulq %rsi ; AVX512F-NEXT: movq %rax, %r10 -; AVX512F-NEXT: addq %r11, %rdx ; AVX512F-NEXT: imulq %rbp, %rsi +; AVX512F-NEXT: addq %r11, %rsi ; AVX512F-NEXT: addq %rdx, %rsi ; AVX512F-NEXT: movq %rbp, %r11 ; AVX512F-NEXT: sarq $63, %r11 @@ -3915,8 +3915,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512F-NEXT: movq %r11, %rax ; AVX512F-NEXT: mulq %rdi ; AVX512F-NEXT: movq %rax, %rbx -; AVX512F-NEXT: addq %r14, %rdx ; AVX512F-NEXT: imulq %rdi, %r11 +; AVX512F-NEXT: addq %r14, %r11 ; AVX512F-NEXT: addq %rdx, %r11 ; AVX512F-NEXT: addq %r10, %rbx ; AVX512F-NEXT: adcq %rsi, %r11 @@ -3987,8 +3987,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512BW-NEXT: movq %r15, %rax ; AVX512BW-NEXT: mulq %r12 ; AVX512BW-NEXT: movq %rax, %rcx -; AVX512BW-NEXT: addq %rbx, %rdx ; AVX512BW-NEXT: imulq %rsi, %r12 +; AVX512BW-NEXT: addq %rbx, %r12 ; AVX512BW-NEXT: addq %rdx, %r12 ; AVX512BW-NEXT: movq %rsi, %rbx ; AVX512BW-NEXT: sarq $63, %rbx @@ -3997,8 +3997,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512BW-NEXT: movq %rbx, %rax ; AVX512BW-NEXT: mulq %r10 ; AVX512BW-NEXT: movq %rax, %r14 -; AVX512BW-NEXT: addq %r13, %rdx ; AVX512BW-NEXT: imulq %r10, %rbx +; AVX512BW-NEXT: addq %r13, %rbx ; AVX512BW-NEXT: addq %rdx, %rbx ; AVX512BW-NEXT: addq %rcx, %r14 ; AVX512BW-NEXT: adcq %r12, %rbx @@ -4041,8 +4041,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512BW-NEXT: movq %r8, %rax ; AVX512BW-NEXT: mulq %rsi ; AVX512BW-NEXT: movq %rax, %r10 -; AVX512BW-NEXT: addq %r11, %rdx ; AVX512BW-NEXT: imulq %rbp, %rsi +; AVX512BW-NEXT: addq %r11, %rsi ; AVX512BW-NEXT: addq %rdx, %rsi ; AVX512BW-NEXT: movq %rbp, %r11 ; AVX512BW-NEXT: sarq $63, %r11 @@ -4051,8 +4051,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind ; AVX512BW-NEXT: movq %r11, %rax ; AVX512BW-NEXT: mulq %rdi ; AVX512BW-NEXT: movq %rax, %rbx -; AVX512BW-NEXT: addq %r14, %rdx ; AVX512BW-NEXT: imulq %rdi, %r11 +; AVX512BW-NEXT: addq %r14, %r11 ; AVX512BW-NEXT: addq %rdx, %r11 ; AVX512BW-NEXT: addq %r10, %rbx ; AVX512BW-NEXT: adcq %rsi, %r11 diff --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll index 7ddff59c26b0ea..943424171d6622 100644 --- a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll +++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll @@ -46,16 +46,16 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1 ; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload ; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload ; X86-NEXT: kmovw %k0, %edi -; X86-NEXT: addl %edi, %eax -; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 2-byte Reload -; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 # 2-byte Reload -; X86-NEXT: kmovw %k2, %edi -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: kmovw %k1, %ecx -; X86-NEXT: addl %edi, %ecx -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: kmovw %k1, %eax ; X86-NEXT: addl %edx, %eax -; X86-NEXT: movw %ax, (%esi) +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload +; X86-NEXT: kmovw %k0, %edx +; X86-NEXT: addl %eax, %edx +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %edi, %edx +; X86-NEXT: movw %dx, (%esi) ; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -104,11 +104,11 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1 ; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; X64-NEXT: kmovw %k0, %edi ; X64-NEXT: kmovw %k1, %r8d -; X64-NEXT: addl %edi, %eax ; X64-NEXT: addl %ecx, %edx ; X64-NEXT: addl %r8d, %eax ; X64-NEXT: addl %esi, %eax ; X64-NEXT: addl %edx, %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: movw %ax, (%rbx) ; X64-NEXT: leaq -8(%rbp), %rsp ; X64-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/win-smallparams.ll b/llvm/test/CodeGen/X86/win-smallparams.ll index 191f1642b10dc3..63ac6e0582adde 100644 --- a/llvm/test/CodeGen/X86/win-smallparams.ll +++ b/llvm/test/CodeGen/X86/win-smallparams.ll @@ -57,14 +57,13 @@ entry: ; WIN32: calll _manyargs ; WIN32-LABEL: _manyargs: -; WIN32: pushl %ebx ; WIN32: pushl %edi ; WIN32: pushl %esi -; WIN32-DAG: movsbl 16(%esp), -; WIN32-DAG: movswl 20(%esp), -; WIN32-DAG: movzbl 24(%esp), -; WIN32-DAG: movzwl 28(%esp), -; WIN32-DAG: movzbl 32(%esp), -; WIN32-DAG: movzwl 36(%esp), +; WIN32-DAG: movzwl 32(%esp), +; WIN32-DAG: movzbl 28(%esp), +; WIN32-DAG: movzwl 24(%esp), +; WIN32-DAG: movzbl 20(%esp), +; WIN32-DAG: movswl 16(%esp), +; WIN32-DAG: movsbl 12(%esp), ; WIN32: retl diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll index 9d55df0602f99a..bfb402c7cf0c06 100644 --- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll +++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll @@ -766,8 +766,8 @@ define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: movl %esi, %r8d ; CHECK-NEXT: .LBB9_2: # %entry -; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %r8d, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: retq ; ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group: @@ -781,8 +781,8 @@ define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 ; CHECK-FORCEALL-NEXT: movl %edi, %eax ; CHECK-FORCEALL-NEXT: movl %esi, %r8d ; CHECK-FORCEALL-NEXT: .LBB9_2: # %entry -; CHECK-FORCEALL-NEXT: addl %edx, %eax ; CHECK-FORCEALL-NEXT: addl %r8d, %eax +; CHECK-FORCEALL-NEXT: addl %edx, %eax ; CHECK-FORCEALL-NEXT: retq entry: %cond = icmp ugt i32 %a, %b @@ -808,8 +808,8 @@ define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) # ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: movl %esi, %r8d ; CHECK-NEXT: .LBB10_2: # %entry -; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %r8d, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: retq ; ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group2: @@ -823,8 +823,8 @@ define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) # ; CHECK-FORCEALL-NEXT: movl %edi, %eax ; CHECK-FORCEALL-NEXT: movl %esi, %r8d ; CHECK-FORCEALL-NEXT: .LBB10_2: # %entry -; CHECK-FORCEALL-NEXT: addl %edx, %eax ; CHECK-FORCEALL-NEXT: addl %r8d, %eax +; CHECK-FORCEALL-NEXT: addl %edx, %eax ; CHECK-FORCEALL-NEXT: retq entry: %cond = icmp ugt i32 %a, %b diff --git a/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll b/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll index 455f0c706d70ef..263ff329163c5c 100644 --- a/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll +++ b/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll @@ -38,8 +38,8 @@ define x86_64_sysvcc float @foo(i32 %a0, i32 %a1, float %b0) { ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movl %edi, %edx ; CHECK-NEXT: callq bar@PLT -; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2ss %eax, %xmm0 ; CHECK-NEXT: addss %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll index 4adc80b3b8bd66..8d68303300ec64 100644 --- a/llvm/test/CodeGen/X86/xmulo.ll +++ b/llvm/test/CodeGen/X86/xmulo.ll @@ -213,53 +213,52 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi ; WIN32-NEXT: subl $8, %esp -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: movl %ecx, %edi -; WIN32-NEXT: sarl $31, %edi -; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: imull %edi, %esi -; WIN32-NEXT: mull %edi -; WIN32-NEXT: movl %eax, %ebx -; WIN32-NEXT: addl %esi, %edx +; WIN32-NEXT: movl %ecx, %ebx +; WIN32-NEXT: sarl $31, %ebx ; WIN32-NEXT: movl %ebp, %esi -; WIN32-NEXT: imull %ebp, %edi -; WIN32-NEXT: addl %edx, %edi +; WIN32-NEXT: imull %ebx, %esi +; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: mull %ebx +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: imull %eax, %ebx +; WIN32-NEXT: addl %esi, %ebx +; WIN32-NEXT: addl %edx, %ebx +; WIN32-NEXT: movl %eax, %esi ; WIN32-NEXT: sarl $31, %esi -; WIN32-NEXT: movl %esi, %ebp -; WIN32-NEXT: imull %ecx, %ebp +; WIN32-NEXT: movl %esi, %edx +; WIN32-NEXT: imull %ecx, %edx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: mull %ecx -; WIN32-NEXT: addl %ebp, %edx ; WIN32-NEXT: imull %ecx, %esi ; WIN32-NEXT: addl %edx, %esi -; WIN32-NEXT: addl %ebx, %eax +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: addl %edi, %eax ; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill -; WIN32-NEXT: adcl %edi, %esi +; WIN32-NEXT: adcl %ebx, %esi ; WIN32-NEXT: movl %ecx, %eax -; WIN32-NEXT: movl %ecx, %edi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: mull %ecx -; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %edx, %ebx ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: mull %ecx -; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %edx, %edi ; WIN32-NEXT: movl %eax, %ecx -; WIN32-NEXT: addl %ebp, %ecx -; WIN32-NEXT: adcl $0, %ebx -; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: addl %ebx, %ecx +; WIN32-NEXT: adcl $0, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: movl %edx, %edi +; WIN32-NEXT: movl %edx, %ebx ; WIN32-NEXT: movl %eax, %ebp ; WIN32-NEXT: addl %ecx, %ebp -; WIN32-NEXT: adcl %ebx, %edi +; WIN32-NEXT: adcl %edi, %ebx ; WIN32-NEXT: setb %cl ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: addl %edi, %eax +; WIN32-NEXT: addl %ebx, %eax ; WIN32-NEXT: movzbl %cl, %ecx ; WIN32-NEXT: adcl %ecx, %edx ; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload @@ -575,29 +574,26 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { ; WIN32-NEXT: pushl %eax ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl %eax, %ecx -; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: sarl $31, %ecx ; WIN32-NEXT: movl %ebp, %edi ; WIN32-NEXT: imull %ecx, %edi ; WIN32-NEXT: movl %ebp, %eax ; WIN32-NEXT: mull %ecx -; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill -; WIN32-NEXT: addl %edi, %edx +; WIN32-NEXT: movl %eax, %esi ; WIN32-NEXT: imull %ebx, %ecx +; WIN32-NEXT: addl %edi, %ecx ; WIN32-NEXT: addl %edx, %ecx ; WIN32-NEXT: sarl $31, %ebx -; WIN32-NEXT: movl %ebx, %edi -; WIN32-NEXT: imull %esi, %edi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: movl %ebx, %edx +; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi ; WIN32-NEXT: movl %ebx, %eax -; WIN32-NEXT: mull %esi -; WIN32-NEXT: addl %edi, %edx -; WIN32-NEXT: movl %esi, %edi -; WIN32-NEXT: imull %esi, %ebx +; WIN32-NEXT: imull %edi, %ebx ; WIN32-NEXT: addl %edx, %ebx -; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: mull %edi +; WIN32-NEXT: addl %edx, %ebx +; WIN32-NEXT: addl %esi, %eax ; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill ; WIN32-NEXT: adcl %ecx, %ebx ; WIN32-NEXT: movl %edi, %eax @@ -606,20 +602,20 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull %ebp ; WIN32-NEXT: movl %edx, %ecx -; WIN32-NEXT: movl %eax, %ebp -; WIN32-NEXT: addl %esi, %ebp +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: addl %esi, %edi ; WIN32-NEXT: adcl $0, %ecx -; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: movl %edx, %edi +; WIN32-NEXT: movl %edx, %ebp ; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: addl %ebp, %esi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; WIN32-NEXT: adcl %ecx, %edi +; WIN32-NEXT: addl %edi, %esi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: adcl %ecx, %ebp ; WIN32-NEXT: setb %cl -; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: movl %edi, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: addl %edi, %eax +; WIN32-NEXT: addl %ebp, %eax ; WIN32-NEXT: movzbl %cl, %ecx ; WIN32-NEXT: adcl %ecx, %edx ; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload @@ -632,9 +628,9 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { ; WIN32-NEXT: jne LBB12_2 ; WIN32-NEXT: # %bb.1: ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi ; WIN32-NEXT: LBB12_2: -; WIN32-NEXT: movl %ebp, %edx +; WIN32-NEXT: movl %edi, %edx ; WIN32-NEXT: addl $4, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi @@ -996,60 +992,60 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) { ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi ; WIN32-NEXT: pushl %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: movl %ecx, %edi ; WIN32-NEXT: sarl $31, %edi -; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: movl %ebp, %esi ; WIN32-NEXT: imull %edi, %esi +; WIN32-NEXT: movl %ebp, %eax ; WIN32-NEXT: mull %edi -; WIN32-NEXT: movl %eax, %ebp -; WIN32-NEXT: addl %esi, %edx -; WIN32-NEXT: movl %ebx, %esi -; WIN32-NEXT: imull %ebx, %edi +; WIN32-NEXT: movl %eax, %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: imull %eax, %edi +; WIN32-NEXT: addl %esi, %edi ; WIN32-NEXT: addl %edx, %edi +; WIN32-NEXT: movl %eax, %esi ; WIN32-NEXT: sarl $31, %esi -; WIN32-NEXT: movl %esi, %ebx -; WIN32-NEXT: imull %ecx, %ebx +; WIN32-NEXT: movl %esi, %edx +; WIN32-NEXT: imull %ecx, %edx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: mull %ecx -; WIN32-NEXT: addl %ebx, %edx ; WIN32-NEXT: imull %ecx, %esi ; WIN32-NEXT: addl %edx, %esi -; WIN32-NEXT: addl %ebp, %eax +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: addl %ebx, %eax ; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill ; WIN32-NEXT: adcl %edi, %esi ; WIN32-NEXT: movl %ecx, %eax -; WIN32-NEXT: movl %ecx, %edi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: mull %ecx -; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %ecx, %ebx +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %edx, %edi ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: mull %ecx +; WIN32-NEXT: mull %ebp ; WIN32-NEXT: movl %edx, %ebp ; WIN32-NEXT: movl %eax, %ecx -; WIN32-NEXT: addl %ebx, %ecx +; WIN32-NEXT: addl %edi, %ecx ; WIN32-NEXT: adcl $0, %ebp -; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: movl %ebx, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: movl %edx, %edi -; WIN32-NEXT: movl %eax, %ebx -; WIN32-NEXT: addl %ecx, %ebx -; WIN32-NEXT: adcl %ebp, %edi +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: addl %ecx, %edi +; WIN32-NEXT: adcl %ebp, %ebx ; WIN32-NEXT: setb %cl ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) -; WIN32-NEXT: addl %edi, %eax +; WIN32-NEXT: addl %ebx, %eax ; WIN32-NEXT: movzbl %cl, %ecx ; WIN32-NEXT: adcl %ecx, %edx ; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; WIN32-NEXT: adcl %esi, %edx -; WIN32-NEXT: sarl $31, %ebx -; WIN32-NEXT: xorl %ebx, %edx -; WIN32-NEXT: xorl %eax, %ebx -; WIN32-NEXT: orl %edx, %ebx +; WIN32-NEXT: sarl $31, %edi +; WIN32-NEXT: xorl %edi, %edx +; WIN32-NEXT: xorl %eax, %edi +; WIN32-NEXT: orl %edx, %edi ; WIN32-NEXT: jne LBB18_1 ; WIN32-NEXT: # %bb.3: # %continue ; WIN32-NEXT: movb $1, %al @@ -1712,20 +1708,20 @@ define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { ; WIN32-NEXT: movl %ebx, %ecx ; WIN32-NEXT: imull %ebp, %ecx ; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: imull %esi, %ebx +; WIN32-NEXT: addl %ecx, %ebx ; WIN32-NEXT: mull %esi ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: addl %ecx, %edx -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: imull %esi, %ebx ; WIN32-NEXT: addl %edx, %ebx ; WIN32-NEXT: movl %ebp, %ecx ; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl %eax, %edi ; WIN32-NEXT: imull %ecx, %edi ; WIN32-NEXT: mull %ecx -; WIN32-NEXT: addl %edi, %edx ; WIN32-NEXT: imull {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: addl %edi, %ecx ; WIN32-NEXT: addl %edx, %ecx ; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1809,55 +1805,55 @@ define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) { ; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: subl $12, %esp +; WIN32-NEXT: subl $16, %esp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl (%eax), %ebp -; WIN32-NEXT: movl 4(%eax), %ebx +; WIN32-NEXT: movl (%eax), %ebx +; WIN32-NEXT: movl 4(%eax), %ebp ; WIN32-NEXT: movl %ecx, %edi ; WIN32-NEXT: sarl $31, %edi -; WIN32-NEXT: movl %ebp, %esi +; WIN32-NEXT: movl %ebx, %esi ; WIN32-NEXT: imull %edi, %esi -; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: movl %ebx, %eax ; WIN32-NEXT: mull %edi ; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill -; WIN32-NEXT: addl %esi, %edx -; WIN32-NEXT: movl %ebx, %esi -; WIN32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; WIN32-NEXT: imull %ebx, %edi +; WIN32-NEXT: imull %ebp, %edi +; WIN32-NEXT: addl %esi, %edi ; WIN32-NEXT: addl %edx, %edi +; WIN32-NEXT: movl %ebp, %esi +; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: sarl $31, %esi -; WIN32-NEXT: movl %esi, %ebx -; WIN32-NEXT: imull %ecx, %ebx +; WIN32-NEXT: movl %esi, %edx +; WIN32-NEXT: imull %ecx, %edx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: mull %ecx -; WIN32-NEXT: addl %ebx, %edx ; WIN32-NEXT: imull %ecx, %esi ; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: addl %edx, %esi ; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: adcl %edi, %esi ; WIN32-NEXT: movl %ecx, %eax -; WIN32-NEXT: mull %ebp -; WIN32-NEXT: movl %edx, %edi +; WIN32-NEXT: mull %ebx +; WIN32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: mull %ebp -; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: mull %ebx +; WIN32-NEXT: movl %edx, %edi ; WIN32-NEXT: movl %eax, %ecx -; WIN32-NEXT: addl %edi, %ecx -; WIN32-NEXT: adcl $0, %ebx +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; WIN32-NEXT: adcl $0, %edi ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; WIN32-NEXT: movl %edx, %edi +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %edx, %ebx ; WIN32-NEXT: movl %eax, %ebp ; WIN32-NEXT: addl %ecx, %ebp -; WIN32-NEXT: adcl %ebx, %edi +; WIN32-NEXT: adcl %edi, %ebx ; WIN32-NEXT: setb %cl ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; WIN32-NEXT: addl %edi, %eax +; WIN32-NEXT: addl %ebx, %eax ; WIN32-NEXT: movzbl %cl, %ecx ; WIN32-NEXT: adcl %ecx, %edx ; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload @@ -1872,7 +1868,7 @@ define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) { ; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload ; WIN32-NEXT: movl %ecx, (%eax) ; WIN32-NEXT: setne %al -; WIN32-NEXT: addl $12, %esp +; WIN32-NEXT: addl $16, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi ; WIN32-NEXT: popl %ebx