Skip to content

Commit

Permalink
[MachineSink] Don't reject sinking because of dead def in isProfitabl…
Browse files Browse the repository at this point in the history
…eToSinkTo().

An instruction should be sunk (if otherwise legal and profitable) regardless
of if it has a dead def of a physreg or not. Physreg defs are checked in other
places and sinking is only done with dead defs of regs that are not live into
the target MBB.

Differential Revision: https://reviews.llvm.org/D150447

Reviewed By: sebastian-ne, arsenm
  • Loading branch information
JonPsson committed May 16, 2023
1 parent a27fd12 commit 64599ac
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 32 deletions.
10 changes: 4 additions & 6 deletions llvm/lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -806,12 +806,10 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
continue;

if (Reg.isPhysical()) {
if (MO.isUse() &&
(MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
continue;

// Don't handle non-constant and non-ignorable physical register.
return false;
// Don't handle non-constant and non-ignorable physical register uses.
if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
return false;
continue;
}

// Users for the defs are all dominated by SuccToSinkTo.
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,8 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-NEXT: .LBB5_2: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[14:15]
; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5]
; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13]
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_andn2_b64 exec, exec, s[12:13]
; GCN-NEXT: s_cbranch_execz .LBB5_7
Expand All @@ -873,12 +875,10 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-NEXT: ; %bb.4: ; %bb2
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5]
; GCN-NEXT: s_mov_b32 s9, s8
; GCN-NEXT: s_mov_b32 s10, s8
; GCN-NEXT: s_mov_b32 s11, s8
; GCN-NEXT: v_mov_b32_e32 v0, s8
; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13]
; GCN-NEXT: v_mov_b32_e32 v1, s9
; GCN-NEXT: v_mov_b32_e32 v2, s10
; GCN-NEXT: v_mov_b32_e32 v3, s11
Expand Down
21 changes: 11 additions & 10 deletions llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,26 @@ define void @needs_and(i32 %arg) {
; GCN-LABEL: needs_and:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s8, 1
; GCN-NEXT: s_mov_b32 s10, 1
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_branch .LBB0_2
; GCN-NEXT: .LBB0_1: ; %endif
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_add_i32 s8, s8, 1
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_b64 s[4:5], exec, vcc
; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
; GCN-NEXT: s_add_i32 s10, s10, 1
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
; GCN-NEXT: s_cbranch_execz .LBB0_4
; GCN-NEXT: .LBB0_2: ; %loop
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s8, v0
; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: v_cmp_gt_u32_e64 s[4:5], s10, v0
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s10, v0
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.3: ; %then
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_nop 1
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
; GCN-NEXT: s_branch .LBB0_1
; GCN-NEXT: .LBB0_4: ; %loopexit
Expand Down Expand Up @@ -107,13 +108,13 @@ define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
; GCN-NEXT: .LBB2_1: ; %endif
; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
; GCN-NEXT: s_add_i32 s10, s10, 1
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
; GCN-NEXT: s_cbranch_execz .LBB2_4
; GCN-NEXT: .LBB2_2: ; %loop
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s10, v0
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: s_cbranch_execz .LBB2_1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 i
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_waitcnt_depctr 0xffe3
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s8
; GCN-NEXT: s_and_b32 s8, exec_lo, s6
; GCN-NEXT: s_or_b32 s7, s8, s7
; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
; GCN-NEXT: s_cbranch_execz .LBB0_5
; GCN-NEXT: .LBB0_2: ; %bb
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_and_b32 s8, exec_lo, s6
; GCN-NEXT: s_or_b32 s7, s8, s7
; GCN-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.3: ; %bb1
Expand Down
57 changes: 57 additions & 0 deletions llvm/test/CodeGen/SystemZ/machinesink-dead-cc.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z15 -O3 -run-pass=machine-sink %s -o - \
# RUN: -verify-machineinstrs | FileCheck %s
#
# Test that the AGHIK can be sunk into %bb.4. It has a def of CC, but it is dead.

--- |
define void @fun() { ret void }
...

# CHECK-LABEL: bb.4:
# CHECK: %1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc
# CHECK-NEXT: CGHI %1, 0, implicit-def $cc
# CHECK-NEXT: BRC 14, 6, %bb.1, implicit $cc
# CHECK-NEXT: J %bb.5


---
name: fun
alignment: 16
tracksRegLiveness: true
registers:
- { id: 0, class: gr64bit }
- { id: 1, class: gr64bit }
- { id: 2, class: grx32bit }
- { id: 3, class: gr64bit }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.0:
%2:grx32bit = LHIMux 0
%3:gr64bit = IMPLICIT_DEF
bb.1:
%0:gr64bit = PHI %3, %bb.0, %1, %bb.4
bb.2:
%1:gr64bit = nsw AGHIK %0, -4, implicit-def dead $cc
CHIMux %2, 0, implicit-def $cc
BRC 14, 6, %bb.4, implicit $cc
J %bb.3
bb.3:
bb.4:
CGHI %1, 0, implicit-def $cc
BRC 14, 6, %bb.1, implicit $cc
J %bb.5
bb.5:
Return
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,17 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
; CHECK-NEXT: jns .LBB0_20
; CHECK-NEXT: .LBB0_5: # %a50b
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: shrl $31, %r9d
; CHECK-NEXT: movl %eax, %r10d
; CHECK-NEXT: orl %esi, %r10d
; CHECK-NEXT: jns .LBB0_26
; CHECK-NEXT: .LBB0_6: # %a57b
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: shrl $31, %r10d
; CHECK-NEXT: shrl $31, %r9d
; CHECK-NEXT: testb %r9b, %r9b
; CHECK-NEXT: je .LBB0_30
; CHECK-NEXT: .LBB0_7: # %a66b
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: shrl $31, %r10d
; CHECK-NEXT: testb %r10b, %r10b
; CHECK-NEXT: jne .LBB0_8
; CHECK-NEXT: .p2align 4, 0x90
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,8 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: ja .LBB1_14
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: leaq -4(%rcx), %r8
; CHECK-NEXT: movq %r8, %r11
; CHECK-NEXT: shrq $2, %r11
; CHECK-NEXT: btl $2, %r8d
; CHECK-NEXT: leaq -4(%rcx), %r11
; CHECK-NEXT: btl $2, %r11d
; CHECK-NEXT: jb .LBB1_8
; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
Expand All @@ -208,12 +206,12 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: movdqu %xmm0, (%r13,%rbp,8)
; CHECK-NEXT: movdqu %xmm0, 16(%r13,%rbp,8)
; CHECK-NEXT: movl $4, %r10d
; CHECK-NEXT: testq %r11, %r11
; CHECK-NEXT: shrq $2, %r11
; CHECK-NEXT: jne .LBB1_11
; CHECK-NEXT: jmp .LBB1_13
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: xorl %r10d, %r10d
; CHECK-NEXT: testq %r11, %r11
; CHECK-NEXT: shrq $2, %r11
; CHECK-NEXT: je .LBB1_13
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
Expand Down
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ zero:
; CHECK: JMP_1 %bb.4
; CHECK: bb.4
; CHECK: bb.5
; CHECK: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg
; CHECK-LV: %3:gr64 = COPY killed %10
; CHECK-LIS: %3:gr64 = COPY %10
; CHECK-LV: TEST64rr killed %1, %1, implicit-def $eflags
; CHECK: %3:gr64 = COPY %10
; CHECK-LV: %4:gr64 = COPY killed %10
; CHECK-LV: %4:gr64 = nuw ADD64ri8 %4, 8, implicit-def dead $eflags
; CHECK-LIS: %4:gr64 = LEA64r %10, 1, $noreg, 8, $noreg
; CHECK: TEST64rr killed %1, %1, implicit-def $eflags
; CHECK: JCC_1 %bb.1, 5, implicit killed $eflags
; CHECK: JMP_1 %bb.6
define void @test2(i8 addrspace(1)* %this, i32 %0, i32 addrspace(1)* %p0, i8 addrspace(1)* %p1) gc "statepoint-example" personality i32* ()* @fake_personality_function {
Expand Down

0 comments on commit 64599ac

Please sign in to comment.