diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp index 5f3789050b813..e43e459f4f247 100644 --- a/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/llvm/lib/CodeGen/RegisterPressure.cpp @@ -477,8 +477,8 @@ class RegisterOperandsCollector { collectOperand(*OperI); // Remove redundant physreg dead defs. - for (const VRegMaskOrUnit &P : RegOpers.Defs) - removeRegLanes(RegOpers.DeadDefs, P); + for (const VRegMaskOrUnit &P : RegOpers.DeadDefs) + removeRegLanes(RegOpers.Defs, P); } void collectInstrLanes(const MachineInstr &MI) const { @@ -486,8 +486,8 @@ class RegisterOperandsCollector { collectOperandLanes(*OperI); // Remove redundant physreg dead defs. - for (const VRegMaskOrUnit &P : RegOpers.Defs) - removeRegLanes(RegOpers.DeadDefs, P); + for (const VRegMaskOrUnit &P : RegOpers.DeadDefs) + removeRegLanes(RegOpers.Defs, P); } /// Push this operand's register onto the correct vectors. diff --git a/llvm/test/CodeGen/X86/dead-register-pr76416.mir b/llvm/test/CodeGen/X86/dead-register-pr76416.mir new file mode 100644 index 0000000000000..f20080fcfb87d --- /dev/null +++ b/llvm/test/CodeGen/X86/dead-register-pr76416.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=x86_64-unknown-unknown -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s + +--- +name: vga_load_state +tracksRegLiveness: true +body: | + bb.0: + liveins: $rdi + + ; CHECK-LABEL: name: vga_load_state + ; CHECK: liveins: $rdi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK-NEXT: dead undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_nosp = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + %0:gr64 = COPY $rdi + undef %1.sub_32bit:gr64_nosp = MOV32r0 implicit-def dead $eflags + dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + INLINEASM &"", 1 /* sideeffect attdialect */ + dead $eax = MOV32r0 implicit-def dead $eflags + INLINEASM &"", 1 /* sideeffect attdialect */ + dead $eax = MOV32r0 implicit-def dead $eflags + dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + +... + diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index 859e9244d29d2..5c8784cf19338 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -776,145 +776,157 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, ptr %divdst) nounwi define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, ptr %divdst) nounwind { ; X86-LABEL: vector_i128_i16: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl %eax ; X86-NEXT: pextrw $7, %xmm0, %eax -; X86-NEXT: pextrw $7, %xmm1, %esi +; X86-NEXT: pextrw $7, %xmm1, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divw %si -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: movd %eax, %xmm2 +; X86-NEXT: divw %cx +; X86-NEXT: movl %eax, %esi ; X86-NEXT: pextrw $6, %xmm0, %eax -; X86-NEXT: pextrw $6, %xmm1, %esi +; X86-NEXT: pextrw $6, %xmm1, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divw %si +; X86-NEXT: divw %cx ; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: movd %eax, %xmm3 -; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: pextrw $5, %xmm0, %eax -; X86-NEXT: pextrw $5, %xmm1, %esi +; X86-NEXT: pextrw $5, %xmm1, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divw %si -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: movd %eax, %xmm4 +; X86-NEXT: divw %cx +; X86-NEXT: movl %eax, %edi ; X86-NEXT: pextrw $4, %xmm0, %eax -; X86-NEXT: pextrw $4, %xmm1, %esi +; X86-NEXT: pextrw $4, %xmm1, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divw %si -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: movd %eax, %xmm2 -; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X86-NEXT: divw %cx +; X86-NEXT: movl %eax, %ebx ; X86-NEXT: pextrw $3, %xmm0, %eax -; X86-NEXT: pextrw $3, %xmm1, %esi +; X86-NEXT: pextrw $3, %xmm1, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divw %si -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: movd %eax, %xmm4 +; X86-NEXT: divw %cx +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: pextrw $2, %xmm0, %eax -; X86-NEXT: pextrw $2, %xmm1, %esi +; X86-NEXT: pextrw $2, %xmm1, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divw %si -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: movd %eax, %xmm3 -; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] +; X86-NEXT: divw %cx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movd %esi, %xmm3 ; X86-NEXT: pextrw $1, %xmm0, %eax ; X86-NEXT: pextrw $1, %xmm1, %esi ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divw %si ; X86-NEXT: # kill: def $ax killed $ax def $eax +; X86-NEXT: movd (%esp), %xmm4 # 4-byte Folded Reload +; X86-NEXT: # xmm4 = mem[0],zero,zero,zero +; X86-NEXT: movd %edi, %xmm5 +; X86-NEXT: movd %ebx, %xmm2 +; X86-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; X86-NEXT: movd %ebp, %xmm6 +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3] +; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; X86-NEXT: movd %ecx, %xmm3 +; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3] ; X86-NEXT: movd %eax, %xmm4 ; X86-NEXT: movd %xmm0, %eax -; X86-NEXT: movd %xmm1, %esi +; X86-NEXT: movd %xmm1, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divw %si +; X86-NEXT: divw %cx ; X86-NEXT: # kill: def $ax killed $ax def $eax ; X86-NEXT: movd %eax, %xmm5 ; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] ; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] ; X86-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0] -; X86-NEXT: movdqa %xmm5, (%ecx) +; X86-NEXT: movdqa %xmm5, (%esi) ; X86-NEXT: pmullw %xmm1, %xmm5 ; X86-NEXT: psubw %xmm5, %xmm0 +; X86-NEXT: addl $4, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: vector_i128_i16: ; X64: # %bb.0: +; X64-NEXT: pushq %rbx ; X64-NEXT: pextrw $7, %xmm0, %eax ; X64-NEXT: pextrw $7, %xmm1, %ecx ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divw %cx -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: movd %eax, %xmm2 +; X64-NEXT: movl %eax, %ecx ; X64-NEXT: pextrw $6, %xmm0, %eax -; X64-NEXT: pextrw $6, %xmm1, %ecx +; X64-NEXT: pextrw $6, %xmm1, %esi ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divw %cx -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: movd %eax, %xmm3 -; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: divw %si +; X64-NEXT: movl %eax, %esi ; X64-NEXT: pextrw $5, %xmm0, %eax -; X64-NEXT: pextrw $5, %xmm1, %ecx +; X64-NEXT: pextrw $5, %xmm1, %r8d ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divw %cx -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: movd %eax, %xmm4 +; X64-NEXT: divw %r8w +; X64-NEXT: movl %eax, %r8d ; X64-NEXT: pextrw $4, %xmm0, %eax -; X64-NEXT: pextrw $4, %xmm1, %ecx +; X64-NEXT: pextrw $4, %xmm1, %r9d ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divw %cx -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: movd %eax, %xmm2 -; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X64-NEXT: divw %r9w +; X64-NEXT: movl %eax, %r9d ; X64-NEXT: pextrw $3, %xmm0, %eax -; X64-NEXT: pextrw $3, %xmm1, %ecx +; X64-NEXT: pextrw $3, %xmm1, %r10d ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divw %cx -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: movd %eax, %xmm3 +; X64-NEXT: divw %r10w +; X64-NEXT: movl %eax, %r10d ; X64-NEXT: pextrw $2, %xmm0, %eax -; X64-NEXT: pextrw $2, %xmm1, %ecx +; X64-NEXT: pextrw $2, %xmm1, %r11d ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divw %cx -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: movd %eax, %xmm4 -; X64-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; X64-NEXT: divw %r11w +; X64-NEXT: movl %eax, %r11d ; X64-NEXT: pextrw $1, %xmm0, %eax -; X64-NEXT: pextrw $1, %xmm1, %ecx +; X64-NEXT: pextrw $1, %xmm1, %ebx ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divw %cx +; X64-NEXT: divw %bx ; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: movd %eax, %xmm3 +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: movd %r8d, %xmm4 +; X64-NEXT: movd %r9d, %xmm5 +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: movd %r10d, %xmm2 +; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] +; X64-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; X64-NEXT: movd %r11d, %xmm3 +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: movd %xmm1, %ecx ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divw %cx ; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: movd %eax, %xmm5 -; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] -; X64-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] -; X64-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0] -; X64-NEXT: movdqa %xmm5, (%rdi) -; X64-NEXT: pmullw %xmm1, %xmm5 -; X64-NEXT: psubw %xmm5, %xmm0 +; X64-NEXT: movd %eax, %xmm4 +; X64-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; X64-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0] +; X64-NEXT: movdqa %xmm4, (%rdi) +; X64-NEXT: pmullw %xmm1, %xmm4 +; X64-NEXT: psubw %xmm4, %xmm0 +; X64-NEXT: popq %rbx ; X64-NEXT: retq %div = udiv <8 x i16> %x, %y store <8 x i16> %div, ptr %divdst, align 16 diff --git a/llvm/test/CodeGen/X86/inline-asm-pr155807.ll b/llvm/test/CodeGen/X86/inline-asm-pr155807.ll new file mode 100644 index 0000000000000..3c10773dbb66c --- /dev/null +++ b/llvm/test/CodeGen/X86/inline-asm-pr155807.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s + +; LiveVariable Analysis transforms (inlineasm rax, eax) to (inlineasm dead rax, eax) +; instead of (inlineasm dead rax, dead eax). RegisterPressure fails at considering +; eax as dead register. +define i64 @pr155807(i64 %vecext) { +; CHECK-LABEL: pr155807: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movabsq $705425148255374, %rax # imm = 0x2819497609C8E +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: idivq %rdi +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: retq +entry: + tail call void asm sideeffect "", "~{rax},~{eax},~{dirflag},~{fpsr},~{flags}"() + %rem = srem i64 705425148255374, %vecext + ret i64 %rem +} diff --git a/llvm/test/CodeGen/X86/inline-asm-pr76416.ll b/llvm/test/CodeGen/X86/inline-asm-pr76416.ll new file mode 100644 index 0000000000000..16b27de394bfa --- /dev/null +++ b/llvm/test/CodeGen/X86/inline-asm-pr76416.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -O2 -mtriple=x86_64 < %s | FileCheck %s +%struct.anon.5.28.78.99.149.119 = type { [4 x i8] } + +@vga_load_state_p = external dso_local global ptr, align 8 +@vga_load_state_data = external dso_local global i8, align 1 + +define dso_local void @vga_load_state() #0 { +; CHECK-LABEL: vga_load_state: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jg .LBB0_3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: incl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jle .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %for.end +; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_4: # %for.cond1 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq vga_load_state_p(%rip), %rax +; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: movzbl (%rax,%rcx), %eax +; CHECK-NEXT: movb %al, vga_load_state_data(%rip) +; CHECK-NEXT: leal 1(%rcx), %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jmp .LBB0_4 +entry: + %i = alloca i32, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 4 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) #2 + %1 = load i32, ptr %i, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + store i32 0, ptr %i, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.cond1, %for.end + call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) #2 + %2 = load ptr, ptr @vga_load_state_p, align 8 + %regs = getelementptr inbounds %struct.anon.5.28.78.99.149.119, ptr %2, i32 0, i32 0 + %3 = load i32, ptr %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom + %4 = load i8, ptr %arrayidx, align 1 + store i8 %4, ptr @vga_load_state_data, align 1 + %5 = load i32, ptr %i, align 4 + %inc5 = add nsw i32 %5, 1 + store i32 %inc5, ptr %i, align 4 + br label %for.cond1, !llvm.loop !1 +} + +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll index 1d98b4f62069d..d50d8ed7aaf6a 100644 --- a/llvm/test/CodeGen/X86/scalar_widen_div.ll +++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll @@ -150,15 +150,15 @@ define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divw %si -; CHECK-NEXT: # kill: def $ax killed $ax def $eax -; CHECK-NEXT: movd %eax, %xmm2 -; CHECK-NEXT: pinsrw $1, %ecx, %xmm2 +; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: pextrw $2, %xmm0, %eax -; CHECK-NEXT: pextrw $2, %xmm1, %ecx +; CHECK-NEXT: pextrw $2, %xmm1, %edi ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: divw %cx +; CHECK-NEXT: divw %di ; CHECK-NEXT: # kill: def $ax killed $ax def $eax +; CHECK-NEXT: movd %esi, %xmm2 +; CHECK-NEXT: pinsrw $1, %ecx, %xmm2 ; CHECK-NEXT: pinsrw $2, %eax, %xmm2 ; CHECK-NEXT: pextrw $3, %xmm0, %eax ; CHECK-NEXT: pextrw $3, %xmm1, %ecx diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll index 5baf7a1dac74c..8b96a8050e65e 100644 --- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll @@ -105,9 +105,9 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mulb {{[0-9]+}}(%esp) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: cmpb %al, %dl ; X86-NEXT: setns %cl @@ -140,9 +140,9 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; X86-LABEL: func4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mulb {{[0-9]+}}(%esp) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: shlb $4, %al ; X86-NEXT: sarb $4, %al ; X86-NEXT: subb %al, %cl diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll index 82dfeeee13293..f49dec5714ff0 100644 --- a/llvm/test/CodeGen/X86/udiv_fix.ll +++ b/llvm/test/CodeGen/X86/udiv_fix.ll @@ -243,28 +243,30 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movdqa %xmm1, %xmm3 ; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] ; X64-NEXT: movq %xmm3, %rcx -; X64-NEXT: movdqa %xmm0, %xmm4 -; X64-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm2[2],xmm4[3],xmm2[3] -; X64-NEXT: psllq $31, %xmm4 -; X64-NEXT: movq %xmm4, %rax +; X64-NEXT: movdqa %xmm0, %xmm3 +; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: psllq $31, %xmm3 +; X64-NEXT: movq %xmm3, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divq %rcx -; X64-NEXT: movq %rax, %xmm3 -; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] -; X64-NEXT: movq %xmm4, %rax -; X64-NEXT: movdqa %xmm1, %xmm4 -; X64-NEXT: psrldq {{.*#+}} xmm4 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X64-NEXT: movq %xmm4, %rcx +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] +; X64-NEXT: movq %xmm3, %rax +; X64-NEXT: movdqa %xmm1, %xmm3 +; X64-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: movq %xmm3, %rsi ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divq %rcx -; X64-NEXT: movq %rax, %xmm4 -; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; X64-NEXT: divq %rsi +; X64-NEXT: movq %rax, %rsi ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X64-NEXT: psllq $31, %xmm0 ; X64-NEXT: movq %xmm0, %rax -; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: movd %xmm1, %edi ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divq %rcx +; X64-NEXT: divq %rdi +; X64-NEXT: movq %rcx, %xmm3 +; X64-NEXT: movq %rsi, %xmm2 +; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; X64-NEXT: movq %rax, %xmm2 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NEXT: movq %xmm0, %rax diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll index 3da5973f9f903..ad95deabb2e55 100644 --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -320,59 +320,61 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %xmm4, %rcx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divq %rcx -; X64-NEXT: movq %rax, %xmm8 +; X64-NEXT: movq %rax, %rcx ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] ; X64-NEXT: movq %xmm3, %rax ; X64-NEXT: movdqa %xmm1, %xmm3 ; X64-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X64-NEXT: movq %xmm3, %rcx +; X64-NEXT: movq %xmm3, %rsi ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divq %rcx -; X64-NEXT: movq %rax, %xmm3 -; X64-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm3[0] -; X64-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] -; X64-NEXT: movdqa %xmm8, %xmm3 -; X64-NEXT: pxor %xmm4, %xmm3 -; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,1,3,3] -; X64-NEXT: movdqa {{.*#+}} xmm7 = [2147483649,2147483649,2147483649,2147483649] -; X64-NEXT: pcmpeqd %xmm7, %xmm6 -; X64-NEXT: movdqa {{.*#+}} xmm5 = [9223372043297226751,9223372043297226751] -; X64-NEXT: movdqa %xmm5, %xmm9 -; X64-NEXT: pcmpgtd %xmm3, %xmm9 -; X64-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] -; X64-NEXT: pand %xmm6, %xmm10 -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3] -; X64-NEXT: por %xmm10, %xmm3 -; X64-NEXT: pcmpeqd %xmm6, %xmm6 -; X64-NEXT: pand %xmm3, %xmm8 -; X64-NEXT: pxor %xmm6, %xmm3 -; X64-NEXT: por %xmm8, %xmm3 -; X64-NEXT: psrlq $1, %xmm3 +; X64-NEXT: divq %rsi +; X64-NEXT: movq %rax, %rsi ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; X64-NEXT: movq %xmm2, %rax -; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: movd %xmm1, %edi ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divq %rcx -; X64-NEXT: movq %rax, %xmm8 -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] -; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: divq %rdi +; X64-NEXT: movq %rcx, %xmm7 +; X64-NEXT: movq %rsi, %xmm0 +; X64-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0] +; X64-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456] +; X64-NEXT: movdqa %xmm7, %xmm3 +; X64-NEXT: pxor %xmm0, %xmm3 +; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; X64-NEXT: movdqa {{.*#+}} xmm6 = [2147483649,2147483649,2147483649,2147483649] +; X64-NEXT: pcmpeqd %xmm6, %xmm4 +; X64-NEXT: movdqa {{.*#+}} xmm5 = [9223372043297226751,9223372043297226751] +; X64-NEXT: movdqa %xmm5, %xmm8 +; X64-NEXT: pcmpgtd %xmm3, %xmm8 +; X64-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] +; X64-NEXT: pand %xmm4, %xmm9 +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm8[1,1,3,3] +; X64-NEXT: por %xmm9, %xmm3 +; X64-NEXT: pcmpeqd %xmm4, %xmm4 +; X64-NEXT: pand %xmm3, %xmm7 +; X64-NEXT: pxor %xmm4, %xmm3 +; X64-NEXT: por %xmm7, %xmm3 +; X64-NEXT: psrlq $1, %xmm3 +; X64-NEXT: movq %rax, %xmm7 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; X64-NEXT: movq %xmm2, %rax ; X64-NEXT: psrlq $32, %xmm1 ; X64-NEXT: movq %xmm1, %rcx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divq %rcx -; X64-NEXT: movq %rax, %xmm0 -; X64-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm0[0] -; X64-NEXT: pxor %xmm8, %xmm4 -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] -; X64-NEXT: pcmpeqd %xmm7, %xmm0 -; X64-NEXT: pcmpgtd %xmm4, %xmm5 -; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2] -; X64-NEXT: pand %xmm0, %xmm1 +; X64-NEXT: movq %rax, %xmm1 +; X64-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm1[0] +; X64-NEXT: pxor %xmm7, %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; X64-NEXT: pcmpeqd %xmm6, %xmm1 +; X64-NEXT: pcmpgtd %xmm0, %xmm5 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2] +; X64-NEXT: pand %xmm1, %xmm2 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3] -; X64-NEXT: por %xmm1, %xmm0 -; X64-NEXT: pxor %xmm0, %xmm6 -; X64-NEXT: pand %xmm8, %xmm0 -; X64-NEXT: por %xmm6, %xmm0 +; X64-NEXT: por %xmm2, %xmm0 +; X64-NEXT: pxor %xmm0, %xmm4 +; X64-NEXT: pand %xmm7, %xmm0 +; X64-NEXT: por %xmm4, %xmm0 ; X64-NEXT: psrlq $1, %xmm0 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/usub_sat_plus.ll b/llvm/test/CodeGen/X86/usub_sat_plus.ll index 0fb14ad5cf7b0..725de4401cb87 100644 --- a/llvm/test/CodeGen/X86/usub_sat_plus.ll +++ b/llvm/test/CodeGen/X86/usub_sat_plus.ll @@ -82,9 +82,9 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mulb {{[0-9]+}}(%esp) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: subb %al, %cl ; X86-NEXT: movzbl %cl, %eax @@ -111,9 +111,9 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind { define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind { ; X86-LABEL: func4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mulb {{[0-9]+}}(%esp) +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: andb $15, %al ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: subb %al, %cl diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll index a076d0d762aa3..657547800075c 100644 --- a/llvm/test/CodeGen/X86/xmulo.ll +++ b/llvm/test/CodeGen/X86/xmulo.ll @@ -90,9 +90,9 @@ define zeroext i1 @smuloi8(i8 %v1, i8 %v2, ptr %res) { ; ; WIN32-LABEL: smuloi8: ; WIN32: # %bb.0: -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: imulb {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) ; WIN32-NEXT: movl %ecx, %eax @@ -303,9 +303,9 @@ define zeroext i1 @umuloi8(i8 %v1, i8 %v2, ptr %res) { ; ; WIN32-LABEL: umuloi8: ; WIN32: # %bb.0: -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: mulb {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) ; WIN32-NEXT: movl %ecx, %eax @@ -1386,10 +1386,10 @@ define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { ; ; WIN32-LABEL: smuloi8_load: ; WIN32: # %bb.0: -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movzbl (%eax), %eax ; WIN32-NEXT: imulb {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) ; WIN32-NEXT: movl %ecx, %eax @@ -1435,10 +1435,10 @@ define zeroext i1 @smuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) { ; ; WIN32-LABEL: smuloi8_load2: ; WIN32: # %bb.0: -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: imulb (%ecx) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) ; WIN32-NEXT: movl %ecx, %eax @@ -1829,10 +1829,10 @@ define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { ; ; WIN32-LABEL: umuloi8_load: ; WIN32: # %bb.0: -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movzbl (%eax), %eax ; WIN32-NEXT: mulb {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) ; WIN32-NEXT: movl %ecx, %eax @@ -1878,10 +1878,10 @@ define zeroext i1 @umuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) { ; ; WIN32-LABEL: umuloi8_load2: ; WIN32: # %bb.0: -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: mulb (%ecx) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) ; WIN32-NEXT: movl %ecx, %eax