diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 5047503f9011e..1bd370584fadb 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -806,12 +806,10 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, continue; if (Reg.isPhysical()) { - if (MO.isUse() && - (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) - continue; - - // Don't handle non-constant and non-ignorable physical register. - return false; + // Don't handle non-constant and non-ignorable physical register uses. + if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO)) + return false; + continue; } // Users for the defs are all dominated by SuccToSinkTo. diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index b79a729cb5fb9..a5edc2ea19362 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -861,6 +861,8 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-NEXT: .LBB5_2: ; %bb10 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[14:15] +; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_andn2_b64 exec, exec, s[12:13] ; GCN-NEXT: s_cbranch_execz .LBB5_7 @@ -873,12 +875,10 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-NEXT: ; %bb.4: ; %bb2 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5] ; GCN-NEXT: s_mov_b32 s9, s8 ; GCN-NEXT: s_mov_b32 s10, s8 ; GCN-NEXT: s_mov_b32 s11, s8 ; GCN-NEXT: v_mov_b32_e32 v0, s8 -; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] ; GCN-NEXT: v_mov_b32_e32 v1, s9 ; GCN-NEXT: v_mov_b32_e32 v2, s10 ; GCN-NEXT: v_mov_b32_e32 v3, s11 diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll index fdbd7ff8d652a..af4fcb4950b65 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -10,25 +10,26 @@ define void @needs_and(i32 %arg) { ; GCN-LABEL: needs_and: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s8, 1 +; GCN-NEXT: s_mov_b32 s10, 1 ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_branch .LBB0_2 ; GCN-NEXT: .LBB0_1: ; %endif ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN-NEXT: s_add_i32 s8, s8, 1 +; GCN-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-NEXT: s_and_b64 s[4:5], exec, vcc +; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7] +; GCN-NEXT: s_add_i32 s10, s10, 1 ; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execz .LBB0_4 ; GCN-NEXT: .LBB0_2: ; %loop ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 -; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s8, v0 -; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7] -; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: v_cmp_gt_u32_e64 s[4:5], s10, v0 +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s10, v0 +; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] ; GCN-NEXT: s_cbranch_execz .LBB0_1 ; GCN-NEXT: ; %bb.3: ; %then ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 -; GCN-NEXT: s_nop 0 +; GCN-NEXT: s_nop 1 ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4 ; GCN-NEXT: s_branch .LBB0_1 ; GCN-NEXT: .LBB0_4: ; %loopexit @@ -107,13 +108,13 @@ define void @break_cond_is_arg(i32 %arg, i1 %breakcond) { ; GCN-NEXT: .LBB2_1: ; %endif ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] ; GCN-NEXT: s_add_i32 s10, s10, 1 ; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execz .LBB2_4 ; GCN-NEXT: .LBB2_2: ; %loop ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] -; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s10, v0 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GCN-NEXT: s_cbranch_execz .LBB2_1 diff --git a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll index 7738a2daecc9e..2d8680ea030ce 100644 --- a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll +++ b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll @@ -13,12 +13,12 @@ define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 i ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 ; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GCN-NEXT: s_and_b32 s8, exec_lo, s6 +; GCN-NEXT: s_or_b32 s7, s8, s7 ; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s7 ; GCN-NEXT: s_cbranch_execz .LBB0_5 ; GCN-NEXT: .LBB0_2: ; %bb ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: s_and_b32 s8, exec_lo, s6 -; GCN-NEXT: s_or_b32 s7, s8, s7 ; GCN-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GCN-NEXT: s_cbranch_execz .LBB0_1 ; GCN-NEXT: ; %bb.3: ; %bb1 diff --git a/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir b/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir new file mode 100644 index 0000000000000..8516cc9114c76 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir @@ -0,0 +1,57 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z15 -O3 -run-pass=machine-sink %s -o - \ +# RUN: -verify-machineinstrs | FileCheck %s +# +# Test that the AGHIK can be sunk into %bb.4. It has a def of CC, but it is dead. + +--- | + define void @fun() { ret void } +... + +# CHECK-LABEL: bb.4: +# CHECK: %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc +# CHECK-NEXT: CGHI %1, 0, implicit-def $cc +# CHECK-NEXT: BRC 14, 6, %bb.1, implicit $cc +# CHECK-NEXT: J %bb.5 + + +--- +name: fun +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64bit } + - { id: 1, class: gr64bit } + - { id: 2, class: grx32bit } + - { id: 3, class: gr64bit } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + + %2:grx32bit = LHIMux 0 + %3:gr64bit = IMPLICIT_DEF + + bb.1: + + %0:gr64bit = PHI %3, %bb.0, %1, %bb.4 + + bb.2: + + %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc + CHIMux %2, 0, implicit-def $cc + BRC 14, 6, %bb.4, implicit $cc + J %bb.3 + + bb.3: + + bb.4: + + CGHI %1, 0, implicit-def $cc + BRC 14, 6, %bb.1, implicit $cc + J %bb.5 + + bb.5: + Return + +... diff --git a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll index 6f9d13cde6b2e..ca839bbb0dced 100644 --- a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll +++ b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll @@ -102,17 +102,17 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5) ; CHECK-NEXT: jns .LBB0_20 ; CHECK-NEXT: .LBB0_5: # %a50b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: shrl $31, %r9d ; CHECK-NEXT: movl %eax, %r10d ; CHECK-NEXT: orl %esi, %r10d ; CHECK-NEXT: jns .LBB0_26 ; CHECK-NEXT: .LBB0_6: # %a57b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: shrl $31, %r10d +; CHECK-NEXT: shrl $31, %r9d ; CHECK-NEXT: testb %r9b, %r9b ; CHECK-NEXT: je .LBB0_30 ; CHECK-NEXT: .LBB0_7: # %a66b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: shrl $31, %r10d ; CHECK-NEXT: testb %r10b, %r10b ; CHECK-NEXT: jne .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll index dead31a8ba013..68bdb9235546b 100644 --- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll +++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -196,10 +196,8 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: ja .LBB1_14 ; CHECK-NEXT: .LBB1_7: # %vector.body.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: leaq -4(%rcx), %r8 -; CHECK-NEXT: movq %r8, %r11 -; CHECK-NEXT: shrq $2, %r11 -; CHECK-NEXT: btl $2, %r8d +; CHECK-NEXT: leaq -4(%rcx), %r11 +; CHECK-NEXT: btl $2, %r11d ; CHECK-NEXT: jb .LBB1_8 ; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 @@ -208,12 +206,12 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: movdqu %xmm0, (%r13,%rbp,8) ; CHECK-NEXT: movdqu %xmm0, 16(%r13,%rbp,8) ; CHECK-NEXT: movl $4, %r10d -; CHECK-NEXT: testq %r11, %r11 +; CHECK-NEXT: shrq $2, %r11 ; CHECK-NEXT: jne .LBB1_11 ; CHECK-NEXT: jmp .LBB1_13 ; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: xorl %r10d, %r10d -; CHECK-NEXT: testq %r11, %r11 +; CHECK-NEXT: shrq $2, %r11 ; CHECK-NEXT: je .LBB1_13 ; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 diff --git a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll index 9c881dd816994..2a98e3dd91ee1 100644 --- a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll +++ b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll @@ -60,10 +60,11 @@ zero: ; CHECK: JMP_1 %bb.4 ; CHECK: bb.4 ; CHECK: bb.5 -; CHECK: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg -; CHECK-LV: %3:gr64 = COPY killed %10 -; CHECK-LIS: %3:gr64 = COPY %10 -; CHECK-LV: TEST64rr killed %1, %1, implicit-def $eflags +; CHECK: %3:gr64 = COPY %10 +; CHECK-LV: %4:gr64 = COPY killed %10 +; CHECK-LV: %4:gr64 = nuw ADD64ri8 %4, 8, implicit-def dead $eflags +; CHECK-LIS: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg +; CHECK: TEST64rr killed %1, %1, implicit-def $eflags ; CHECK: JCC_1 %bb.1, 5, implicit killed $eflags ; CHECK: JMP_1 %bb.6 define void @test2(i8 addrspace(1)* %this, i32 %0, i32 addrspace(1)* %p0, i8 addrspace(1)* %p1) gc "statepoint-example" personality i32* ()* @fake_personality_function {