diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 65c067e4874b1..8dc6781fcb018 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -411,6 +411,7 @@ class MachineCopyPropagation : public MachineFunctionPass { typedef enum { DebugUse = false, RegularUse = true } DebugType; void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT); + void readSuccessorLiveIns(const MachineBasicBlock &MBB); void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); void EliminateSpillageCopies(MachineBasicBlock &MBB); @@ -463,6 +464,22 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, } } +void MachineCopyPropagation::readSuccessorLiveIns( + const MachineBasicBlock &MBB) { + if (MaybeDeadCopies.empty()) + return; + + // If a copy result is livein to a successor, it is not dead. + for (const MachineBasicBlock *Succ : MBB.successors()) { + for (const auto &LI : Succ->liveins()) { + for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) { + if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI)) + MaybeDeadCopies.remove(Copy); + } + } + } +} + /// Return true if \p PreviousCopy did copy register \p Src to register \p Def. /// This fact may have been obscured by sub register usage or may not be true at /// all even though Src and Def are subregisters of the registers used in @@ -914,10 +931,17 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } - // If MBB doesn't have successors, delete the copies whose defs are not used. - // If MBB does have successors, then conservative assume the defs are live-out - // since we don't want to trust live-in lists. - if (MBB.succ_empty()) { + bool TracksLiveness = MRI->tracksLiveness(); + + // If liveness is tracked, we can use the live-in lists to know which + // copies aren't dead. + if (TracksLiveness) + readSuccessorLiveIns(MBB); + + // If MBB doesn't have succesor, delete copies whose defs are not used. + // If MBB does have successors, we can only delete copies if we are able to + // use liveness information from successors to confirm they are really dead. + if (MBB.succ_empty() || TracksLiveness) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); diff --git a/llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir b/llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir index 23cf1dcda839e..5b379c2bd5629 100644 --- a/llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir +++ b/llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir @@ -10,7 +10,6 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0, implicit $w0 ; CHECK-NEXT: $w8 = ORRWrs $wzr, $w0, 0, implicit-def $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll index 932b230726a3a..934ff44900c04 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -147,10 +147,10 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: mov v19.16b, v23.16b ; CHECK-NEXT: mov v3.d[1], x20 ; CHECK-NEXT: mov v23.16b, v27.16b -; CHECK-NEXT: mov v27.16b, v9.16b -; CHECK-NEXT: mul x15, x4, x5 ; CHECK-NEXT: add v27.2d, v9.2d, v1.2d +; CHECK-NEXT: mul x15, x4, x5 ; CHECK-NEXT: str q11, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov v11.16b, v15.16b ; CHECK-NEXT: mov v4.d[1], x22 ; CHECK-NEXT: add v19.2d, v19.2d, v1.2d ; CHECK-NEXT: add v7.2d, v7.2d, v1.2d @@ -171,9 +171,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: mov v10.16b, v26.16b ; CHECK-NEXT: mov v14.d[1], x13 ; CHECK-NEXT: mov v22.16b, v31.16b -; CHECK-NEXT: mov v20.16b, v8.16b ; CHECK-NEXT: ldp q26, q31, [sp] // 32-byte Folded Reload -; CHECK-NEXT: mov v11.16b, v15.16b ; CHECK-NEXT: mov v0.d[1], x12 ; CHECK-NEXT: add v13.2d, v13.2d, v14.2d ; CHECK-NEXT: add v31.2d, v31.2d, v14.2d diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll index 1ebd864e7e03a..29704959fc176 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll @@ -477,7 +477,6 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac ; GFX1032-NEXT: s_cbranch_execz .LBB1_3 ; GFX1032-NEXT: ; %bb.2: ; GFX1032-NEXT: v_mov_b32_e32 v0, s11 -; GFX1032-NEXT: s_mov_b32 s10, s11 ; GFX1032-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc ; GFX1032-NEXT: .LBB1_3: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 @@ -615,7 +614,6 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac ; GFX1132-NEXT: s_cbranch_execz .LBB1_3 ; GFX1132-NEXT: ; %bb.2: ; GFX1132-NEXT: v_mov_b32_e32 v0, s11 -; GFX1132-NEXT: s_mov_b32 s10, s11 ; GFX1132-NEXT: buffer_atomic_add_u32 v0, off, s[4:7], 0 glc ; GFX1132-NEXT: .LBB1_3: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s9 diff --git a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll index c8278e58ad064..8748767501bd0 100644 --- a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll +++ b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll @@ -29,9 +29,7 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar ; CHECK-NEXT: nop ; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-NEXT: cmpwi 3, 0 -; CHECK-NEXT: crmove 20, 10 ; CHECK-NEXT: crorc 20, 10, 2 -; CHECK-NEXT: crmove 21, 2 ; CHECK-NEXT: bc 4, 20, .LBB0_4 ; CHECK-NEXT: # %bb.2: # %if.end5 ; CHECK-NEXT: addis 3, 2, .L.str@toc@ha @@ -76,11 +74,9 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar ; BE-NEXT: addi 3, 31, 128 ; BE-NEXT: bl _setjmp ; BE-NEXT: nop -; BE-NEXT: crmove 20, 10 ; BE-NEXT: # kill: def $r3 killed $r3 killed $x3 ; BE-NEXT: cmpwi 3, 0 ; BE-NEXT: crorc 20, 10, 2 -; BE-NEXT: crmove 21, 2 ; BE-NEXT: bc 4, 20, .LBB0_4 ; BE-NEXT: # %bb.2: # %if.end5 ; BE-NEXT: addis 3, 2, .L.str@toc@ha diff --git a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll index 767b7028a967c..a0f8374e074d9 100644 --- a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll +++ b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll @@ -42,9 +42,8 @@ define i64 @loopif(ptr nocapture readonly %x, i32 %y, i32 %n) { ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: blt .LBB1_4 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph -; CHECK-NEXT: mov lr, r2 -; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: dls lr, r2 +; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll index 4ab569777b2ad..93cab25c2cb72 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll @@ -542,9 +542,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_simple(ptr noalias nocapture reado ; CHECK-NEXT: .pad #28 ; CHECK-NEXT: sub sp, #28 ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: strd r1, r2, [sp, #4] @ 8-byte Folded Spill ; CHECK-NEXT: blt .LBB11_5 ; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload @@ -661,9 +659,7 @@ define arm_aapcs_vfpcc void @gather_inc_v8i16_complex(ptr noalias nocapture read ; CHECK-NEXT: .pad #136 ; CHECK-NEXT: sub sp, #136 ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: str r1, [sp, #64] @ 4-byte Spill -; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: str r2, [sp, #68] @ 4-byte Spill +; CHECK-NEXT: strd r1, r2, [sp, #64] @ 8-byte Folded Spill ; CHECK-NEXT: blt.w .LBB12_5 ; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: ldr r1, [sp, #68] @ 4-byte Reload @@ -952,11 +948,9 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_complex(ptr noalias nocapture read ; CHECK-NEXT: vstrw.32 q1, [sp, #152] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q1, [sp, #296] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q0, [sp, #168] @ 16-byte Spill -; CHECK-NEXT: vmov q0, q2 -; CHECK-NEXT: vmov q3, q5 -; CHECK-NEXT: vadd.i32 q1, q1, r0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #248] @ 16-byte Reload ; CHECK-NEXT: vldrw.u32 q3, [sp, #216] @ 16-byte Reload +; CHECK-NEXT: vadd.i32 q1, q1, r0 ; CHECK-NEXT: vstrw.32 q5, [sp, #120] @ 16-byte Spill ; CHECK-NEXT: vadd.i32 q0, q0, r0 ; CHECK-NEXT: subs.w r11, r11, #16 @@ -1243,9 +1237,7 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado ; CHECK-NEXT: .pad #64 ; CHECK-NEXT: sub sp, #64 ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: str r2, [sp, #60] @ 4-byte Spill +; CHECK-NEXT: strd r1, r2, [sp, #56] @ 8-byte Folded Spill ; CHECK-NEXT: blt.w .LBB14_5 ; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: adr r5, .LCPI14_3 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll index 18c8a8a22ef22..7b8b884576d13 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -609,7 +609,6 @@ define dso_local void @arm_mat_mult_q15(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: strd r0, r2, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r0, r3 ; CHECK-NEXT: itt ne ; CHECK-NEXT: ldrne r0, [sp, #136] ; CHECK-NEXT: cmpne r0, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll index 9987ff940b5aa..77980be905207 100644 --- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll @@ -108,9 +108,7 @@ define void @correlate(ptr nocapture noundef readonly %ID, ptr nocapture noundef ; CHECK-NEXT: .pad #12 ; CHECK-NEXT: sub sp, #12 ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: strd r0, r1, [sp] @ 8-byte Folded Spill -; CHECK-NEXT: mov r1, r3 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: stm.w sp, {r0, r1, r3} @ 12-byte Folded Spill ; CHECK-NEXT: blt .LBB4_12 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph ; CHECK-NEXT: ldr r1, [sp, #48] diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index 82a186bcc73d6..c03339b52f264 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -1062,9 +1062,8 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 ; CHECK-NEXT: cmp r2, #8 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: vstr s0, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: blo .LBB7_9 ; CHECK-NEXT: @ %bb.1: diff --git a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll index 219541cffb940..2e51e9e059f65 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll @@ -95,14 +95,13 @@ define void @vldst4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRo ; CHECK-NEXT: vmovx.f16 s8, s27 ; CHECK-NEXT: vins.f16 s12, s24 ; CHECK-NEXT: vins.f16 s13, s25 +; CHECK-NEXT: vins.f16 s2, s10 ; CHECK-NEXT: vins.f16 s3, s11 ; CHECK-NEXT: vins.f16 s1, s9 -; CHECK-NEXT: vins.f16 s2, s10 ; CHECK-NEXT: vins.f16 s22, s8 ; CHECK-NEXT: vmov q2, q3 -; CHECK-NEXT: vmov.f32 s17, s0 -; CHECK-NEXT: vmov.f32 s10, s4 ; CHECK-NEXT: vmov q6, q0 +; CHECK-NEXT: vmov.f32 s10, s4 ; CHECK-NEXT: vmov.f32 s11, s7 ; CHECK-NEXT: vmov.f32 s9, s0 ; CHECK-NEXT: vmov.f32 s17, s2 diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll index 1bd427c4a4b0c..81dafdffe3116 100644 --- a/llvm/test/CodeGen/X86/optimize-max-0.ll +++ b/llvm/test/CodeGen/X86/optimize-max-0.ll @@ -489,7 +489,6 @@ define void @bar(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind { ; CHECK-NEXT: jb LBB1_4 ; CHECK-NEXT: ## %bb.5: ## %bb9 ; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: incl %ecx ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl %edi, %edx diff --git a/llvm/test/CodeGen/X86/tls-loads-control3.ll b/llvm/test/CodeGen/X86/tls-loads-control3.ll index 82daac5a9bae3..4e521b1c696a4 100644 --- a/llvm/test/CodeGen/X86/tls-loads-control3.ll +++ b/llvm/test/CodeGen/X86/tls-loads-control3.ll @@ -183,7 +183,6 @@ define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 { ; HOIST0-NEXT: # %bb.1: # %while.body.preheader ; HOIST0-NEXT: leaq _ZZ2f2iE2st.0@TLSLD(%rip), %rdi ; HOIST0-NEXT: callq __tls_get_addr@PLT -; HOIST0-NEXT: movq %rax, %rcx ; HOIST0-NEXT: leaq _ZZ2f2iE2st.0@DTPOFF(%rax), %r15 ; HOIST0-NEXT: leaq _ZZ2f2iE2st.1@DTPOFF(%rax), %r12 ; HOIST0-NEXT: .p2align 4, 0x90 @@ -245,9 +244,7 @@ define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 { ; HOIST2-NEXT: movq %rax, %r14 ; HOIST2-NEXT: addb %bpl, _ZZ2f2iE2st.0@DTPOFF(%rax) ; HOIST2-NEXT: callq _Z5gfuncv@PLT -; HOIST2-NEXT: movl %eax, %ecx -; HOIST2-NEXT: movq %r14, %rax -; HOIST2-NEXT: addl %ecx, _ZZ2f2iE2st.1@DTPOFF(%r14) +; HOIST2-NEXT: addl %eax, _ZZ2f2iE2st.1@DTPOFF(%r14) ; HOIST2-NEXT: decl %ebx ; HOIST2-NEXT: jne .LBB1_2 ; HOIST2-NEXT: .LBB1_3: # %while.end