Skip to content

Commit

Permalink
[MCP] Remove dead copies from basic blocks with successors. (#86973)
Browse files Browse the repository at this point in the history
Previously we wouldn't remove dead copies from basic blocks with
successors. The comment said we didn't want to trust the live-in lists.
The comment is very old so I'm not sure if that's still a concern today.

This patch checks the live-in lists and removes copies from
MaybeDeadCopies if they are referenced by any live-ins in any
successors. We only do this if the tracksLiveness property is set. If
that property is not set, we retain the old behavior.
  • Loading branch information
topperc committed Mar 28, 2024
1 parent 62d6beb commit 23d45e5
Show file tree
Hide file tree
Showing 13 changed files with 40 additions and 43 deletions.
32 changes: 28 additions & 4 deletions llvm/lib/CodeGen/MachineCopyPropagation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ class MachineCopyPropagation : public MachineFunctionPass {
typedef enum { DebugUse = false, RegularUse = true } DebugType;

void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
void readSuccessorLiveIns(const MachineBasicBlock &MBB);
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
void EliminateSpillageCopies(MachineBasicBlock &MBB);
Expand Down Expand Up @@ -463,6 +464,22 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
}
}

void MachineCopyPropagation::readSuccessorLiveIns(
const MachineBasicBlock &MBB) {
if (MaybeDeadCopies.empty())
return;

// If a copy result is livein to a successor, it is not dead.
for (const MachineBasicBlock *Succ : MBB.successors()) {
for (const auto &LI : Succ->liveins()) {
for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI))
MaybeDeadCopies.remove(Copy);
}
}
}
}

/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
/// This fact may have been obscured by sub register usage or may not be true at
/// all even though Src and Def are subregisters of the registers used in
Expand Down Expand Up @@ -914,10 +931,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
}

// If MBB doesn't have successors, delete the copies whose defs are not used.
// If MBB does have successors, then conservative assume the defs are live-out
// since we don't want to trust live-in lists.
if (MBB.succ_empty()) {
bool TracksLiveness = MRI->tracksLiveness();

// If liveness is tracked, we can use the live-in lists to know which
// copies aren't dead.
if (TracksLiveness)
readSuccessorLiveIns(MBB);

// If MBB doesn't have succesor, delete copies whose defs are not used.
// If MBB does have successors, we can only delete copies if we are able to
// use liveness information from successors to confirm they are really dead.
if (MBB.succ_empty() || TracksLiveness) {
for (MachineInstr *MaybeDead : MaybeDeadCopies) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0, implicit $w0
; CHECK-NEXT: $w8 = ORRWrs $wzr, $w0, 0, implicit-def $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: mov v19.16b, v23.16b
; CHECK-NEXT: mov v3.d[1], x20
; CHECK-NEXT: mov v23.16b, v27.16b
; CHECK-NEXT: mov v27.16b, v9.16b
; CHECK-NEXT: mul x15, x4, x5
; CHECK-NEXT: add v27.2d, v9.2d, v1.2d
; CHECK-NEXT: mul x15, x4, x5
; CHECK-NEXT: str q11, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: mov v11.16b, v15.16b
; CHECK-NEXT: mov v4.d[1], x22
; CHECK-NEXT: add v19.2d, v19.2d, v1.2d
; CHECK-NEXT: add v7.2d, v7.2d, v1.2d
Expand All @@ -171,9 +171,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: mov v10.16b, v26.16b
; CHECK-NEXT: mov v14.d[1], x13
; CHECK-NEXT: mov v22.16b, v31.16b
; CHECK-NEXT: mov v20.16b, v8.16b
; CHECK-NEXT: ldp q26, q31, [sp] // 32-byte Folded Reload
; CHECK-NEXT: mov v11.16b, v15.16b
; CHECK-NEXT: mov v0.d[1], x12
; CHECK-NEXT: add v13.2d, v13.2d, v14.2d
; CHECK-NEXT: add v31.2d, v31.2d, v14.2d
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,6 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX1032-NEXT: s_cbranch_execz .LBB1_3
; GFX1032-NEXT: ; %bb.2:
; GFX1032-NEXT: v_mov_b32_e32 v0, s11
; GFX1032-NEXT: s_mov_b32 s10, s11
; GFX1032-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
; GFX1032-NEXT: .LBB1_3:
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
Expand Down Expand Up @@ -615,7 +614,6 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX1132-NEXT: s_cbranch_execz .LBB1_3
; GFX1132-NEXT: ; %bb.2:
; GFX1132-NEXT: v_mov_b32_e32 v0, s11
; GFX1132-NEXT: s_mov_b32 s10, s11
; GFX1132-NEXT: buffer_atomic_add_u32 v0, off, s[4:7], 0 glc
; GFX1132-NEXT: .LBB1_3:
; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s9
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
; CHECK-NEXT: nop
; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3
; CHECK-NEXT: cmpwi 3, 0
; CHECK-NEXT: crmove 20, 10
; CHECK-NEXT: crorc 20, 10, 2
; CHECK-NEXT: crmove 21, 2
; CHECK-NEXT: bc 4, 20, .LBB0_4
; CHECK-NEXT: # %bb.2: # %if.end5
; CHECK-NEXT: addis 3, 2, .L.str@toc@ha
Expand Down Expand Up @@ -76,11 +74,9 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
; BE-NEXT: addi 3, 31, 128
; BE-NEXT: bl _setjmp
; BE-NEXT: nop
; BE-NEXT: crmove 20, 10
; BE-NEXT: # kill: def $r3 killed $r3 killed $x3
; BE-NEXT: cmpwi 3, 0
; BE-NEXT: crorc 20, 10, 2
; BE-NEXT: crmove 21, 2
; BE-NEXT: bc 4, 20, .LBB0_4
; BE-NEXT: # %bb.2: # %if.end5
; BE-NEXT: addis 3, 2, .L.str@toc@ha
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ define i64 @loopif(ptr nocapture readonly %x, i32 %y, i32 %n) {
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: blt .LBB1_4
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
; CHECK-NEXT: mov lr, r2
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .p2align 2
Expand Down
16 changes: 4 additions & 12 deletions llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
Original file line number Diff line number Diff line change
Expand Up @@ -542,9 +542,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_simple(ptr noalias nocapture reado
; CHECK-NEXT: .pad #28
; CHECK-NEXT: sub sp, #28
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: strd r1, r2, [sp, #4] @ 8-byte Folded Spill
; CHECK-NEXT: blt .LBB11_5
; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
Expand Down Expand Up @@ -661,9 +659,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(ptr noalias nocapture read
; CHECK-NEXT: .pad #136
; CHECK-NEXT: sub sp, #136
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: str r1, [sp, #64] @ 4-byte Spill
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: str r2, [sp, #68] @ 4-byte Spill
; CHECK-NEXT: strd r1, r2, [sp, #64] @ 8-byte Folded Spill
; CHECK-NEXT: blt.w .LBB12_5
; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
; CHECK-NEXT: ldr r1, [sp, #68] @ 4-byte Reload
Expand Down Expand Up @@ -952,11 +948,9 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_complex(ptr noalias nocapture read
; CHECK-NEXT: vstrw.32 q1, [sp, #152] @ 16-byte Spill
; CHECK-NEXT: vldrw.u32 q1, [sp, #296] @ 16-byte Reload
; CHECK-NEXT: vstrw.32 q0, [sp, #168] @ 16-byte Spill
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: vmov q3, q5
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vldrw.u32 q0, [sp, #248] @ 16-byte Reload
; CHECK-NEXT: vldrw.u32 q3, [sp, #216] @ 16-byte Reload
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vstrw.32 q5, [sp, #120] @ 16-byte Spill
; CHECK-NEXT: vadd.i32 q0, q0, r0
; CHECK-NEXT: subs.w r11, r11, #16
Expand Down Expand Up @@ -1243,9 +1237,7 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado
; CHECK-NEXT: .pad #64
; CHECK-NEXT: sub sp, #64
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: str r2, [sp, #60] @ 4-byte Spill
; CHECK-NEXT: strd r1, r2, [sp, #56] @ 8-byte Folded Spill
; CHECK-NEXT: blt.w .LBB14_5
; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader
; CHECK-NEXT: adr r5, .LCPI14_3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,6 @@ define dso_local void @arm_mat_mult_q15(ptr noalias nocapture readonly %A, ptr n
; CHECK-NEXT: strd r0, r2, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: mov r0, r3
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrne r0, [sp, #136]
; CHECK-NEXT: cmpne r0, #0
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ define void @correlate(ptr nocapture noundef readonly %ID, ptr nocapture noundef
; CHECK-NEXT: .pad #12
; CHECK-NEXT: sub sp, #12
; CHECK-NEXT: cmp r3, #1
; CHECK-NEXT: strd r0, r1, [sp] @ 8-byte Folded Spill
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: stm.w sp, {r0, r1, r3} @ 12-byte Folded Spill
; CHECK-NEXT: blt .LBB4_12
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
; CHECK-NEXT: ldr r1, [sp, #48]
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1062,9 +1062,8 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
; CHECK-NEXT: .pad #40
; CHECK-NEXT: sub sp, #40
; CHECK-NEXT: cmp r2, #8
; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: vstr s0, [sp] @ 4-byte Spill
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: blo .LBB7_9
; CHECK-NEXT: @ %bb.1:
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/Thumb2/mve-vldst4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,13 @@ define void @vldst4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRo
; CHECK-NEXT: vmovx.f16 s8, s27
; CHECK-NEXT: vins.f16 s12, s24
; CHECK-NEXT: vins.f16 s13, s25
; CHECK-NEXT: vins.f16 s2, s10
; CHECK-NEXT: vins.f16 s3, s11
; CHECK-NEXT: vins.f16 s1, s9
; CHECK-NEXT: vins.f16 s2, s10
; CHECK-NEXT: vins.f16 s22, s8
; CHECK-NEXT: vmov q2, q3
; CHECK-NEXT: vmov.f32 s17, s0
; CHECK-NEXT: vmov.f32 s10, s4
; CHECK-NEXT: vmov q6, q0
; CHECK-NEXT: vmov.f32 s10, s4
; CHECK-NEXT: vmov.f32 s11, s7
; CHECK-NEXT: vmov.f32 s9, s0
; CHECK-NEXT: vmov.f32 s17, s2
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/X86/optimize-max-0.ll
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,6 @@ define void @bar(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind {
; CHECK-NEXT: jb LBB1_4
; CHECK-NEXT: ## %bb.5: ## %bb9
; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: incl %ecx
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl %edi, %edx
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/CodeGen/X86/tls-loads-control3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 {
; HOIST0-NEXT: # %bb.1: # %while.body.preheader
; HOIST0-NEXT: leaq _ZZ2f2iE2st.0@TLSLD(%rip), %rdi
; HOIST0-NEXT: callq __tls_get_addr@PLT
; HOIST0-NEXT: movq %rax, %rcx
; HOIST0-NEXT: leaq _ZZ2f2iE2st.0@DTPOFF(%rax), %r15
; HOIST0-NEXT: leaq _ZZ2f2iE2st.1@DTPOFF(%rax), %r12
; HOIST0-NEXT: .p2align 4, 0x90
Expand Down Expand Up @@ -245,9 +244,7 @@ define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 {
; HOIST2-NEXT: movq %rax, %r14
; HOIST2-NEXT: addb %bpl, _ZZ2f2iE2st.0@DTPOFF(%rax)
; HOIST2-NEXT: callq _Z5gfuncv@PLT
; HOIST2-NEXT: movl %eax, %ecx
; HOIST2-NEXT: movq %r14, %rax
; HOIST2-NEXT: addl %ecx, _ZZ2f2iE2st.1@DTPOFF(%r14)
; HOIST2-NEXT: addl %eax, _ZZ2f2iE2st.1@DTPOFF(%r14)
; HOIST2-NEXT: decl %ebx
; HOIST2-NEXT: jne .LBB1_2
; HOIST2-NEXT: .LBB1_3: # %while.end
Expand Down

0 comments on commit 23d45e5

Please sign in to comment.