diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 94139b64a3e30..97cc1b1143c73 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -505,6 +505,11 @@ class MachineBasicBlock LLVM_ABI void removeLiveIn(MCRegister Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); + /// Remove the specified register from any overlapped live in. The method is + /// subreg-aware and removes Reg and its subregs from the live in set. It also + /// clears the corresponding bitmask from its live-in super registers. + LLVM_ABI void removeLiveInOverlappedWith(MCRegister Reg); + /// Return true if the specified register is in the live in set. LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask = LaneBitmask::getAll()) const; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 08a51b9b0242a..206c4e5963bf8 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -606,6 +606,26 @@ void MachineBasicBlock::removeLiveIn(MCRegister Reg, LaneBitmask LaneMask) { LiveIns.erase(I); } +void MachineBasicBlock::removeLiveInOverlappedWith(MCRegister Reg) { + const MachineFunction *MF = getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + // Remove Reg and its subregs from live in set. + for (MCPhysReg S : TRI->subregs_inclusive(Reg)) + removeLiveIn(S); + + // Remove live-in bitmask in super registers as well. + for (MCPhysReg Super : TRI->superregs(Reg)) { + for (MCSubRegIndexIterator SRI(Super, TRI); SRI.isValid(); ++SRI) { + if (Reg == SRI.getSubReg()) { + unsigned SubRegIndex = SRI.getSubRegIndex(); + LaneBitmask SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex); + removeLiveIn(Super, SubRegLaneMask); + break; + } + } + } +} + MachineBasicBlock::livein_iterator MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) { // Get non-const version of iterator. diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 9ec5151a039b7..d5153b7fb6207 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -2187,11 +2187,9 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, const SmallVectorImpl &UsedOpsInCopy, const SmallVectorImpl &DefedRegsInCopy) { - MachineFunction &MF = *SuccBB->getParent(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); for (Register DefReg : DefedRegsInCopy) - for (MCPhysReg S : TRI->subregs_inclusive(DefReg)) - SuccBB->removeLiveIn(S); + SuccBB->removeLiveInOverlappedWith(DefReg); + for (auto U : UsedOpsInCopy) SuccBB->addLiveIn(MI->getOperand(U).getReg()); SuccBB->sortUniqueLiveIns(); diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir index c456f9c4b16e5..a2ec87053a8d5 100644 --- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir +++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir @@ -49,7 +49,7 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F + ; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec ; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir b/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir new file mode 100644 index 0000000000000..eb48ff08f1b7c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir @@ -0,0 +1,113 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s + +# Test live-in with subrange is updated accordingly in postra-machine-sink. +--- +name: test_postra_machine_sink_livein_update +tracksRegLiveness: true +frameInfo: + adjustsStack: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: test_postra_machine_sink_livein_update + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr44 = COPY $vgpr13, implicit $exec + ; GCN-NEXT: renamable $vgpr43 = COPY $vgpr12, implicit $exec + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec + ; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec + ; GCN-NEXT: renamable $vgpr59 = COPY $vgpr7, implicit $exec + ; GCN-NEXT: renamable $vgpr58 = COPY $vgpr6, implicit $exec + ; GCN-NEXT: renamable $vgpr61 = COPY $vgpr5, implicit $exec + ; GCN-NEXT: renamable $vgpr60 = COPY $vgpr4, implicit $exec + ; GCN-NEXT: renamable $vgpr42 = COPY $vgpr3, implicit $exec + ; GCN-NEXT: renamable $vgpr41 = COPY $vgpr2, implicit $exec + ; GCN-NEXT: renamable $vgpr46 = COPY $vgpr1, implicit $exec + ; GCN-NEXT: renamable $vgpr45 = COPY $vgpr0, implicit $exec + ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31 + ; GCN-NEXT: SI_SPILL_AV64_SAVE killed $vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5) + ; GCN-NEXT: SI_SPILL_AV64_SAVE killed $vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5) + ; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0 + ; GCN-NEXT: renamable $vgpr14_vgpr15 = SI_SPILL_AV64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5) + ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GCN-NEXT: renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30_sgpr31 + + renamable $vgpr44 = COPY $vgpr13, implicit $exec + renamable $vgpr43 = COPY $vgpr12, implicit $exec + renamable $vgpr57 = COPY $vgpr9, implicit $exec + renamable $vgpr56 = COPY $vgpr8, implicit $exec + renamable $vgpr59 = COPY $vgpr7, implicit $exec + renamable $vgpr58 = COPY $vgpr6, implicit $exec + renamable $vgpr61 = COPY $vgpr5, implicit $exec + renamable $vgpr60 = COPY $vgpr4, implicit $exec + renamable $vgpr42 = COPY $vgpr3, implicit $exec + renamable $vgpr41 = COPY $vgpr2, implicit $exec + renamable $vgpr46 = COPY $vgpr1, implicit $exec + renamable $vgpr45 = COPY $vgpr0, implicit $exec + S_CBRANCH_SCC1 %bb.2, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + liveins: $sgpr30, $sgpr31, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F + + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + renamable $sgpr16_sgpr17 = IMPLICIT_DEF + $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31 + SI_SPILL_AV64_SAVE killed $vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5) + SI_SPILL_AV64_SAVE killed $vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5) + dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0 + renamable $vgpr14_vgpr15 = SI_SPILL_AV64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5) + renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5) + FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + + bb.2: + liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F + + renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec + FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + S_SETPC_B64_return undef $sgpr30_sgpr31 +...