diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index c70620fd75328..38493edf0af13 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -529,19 +529,12 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg, | getInternalReadRegState(!FirstCopy), SubIdx) .addReg(FromReg, 0, SubIdx); - BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (FirstCopy) { Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); } else { CopyMI->bundleWithPred(); } - LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx); - DestLI.refineSubRanges(Allocator, LaneMask, - [Def, &Allocator](LiveInterval::SubRange &SR) { - SR.createDeadDef(Def, Allocator); - }, - Indexes, TRI); return Def; } @@ -549,11 +542,11 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) { const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); + SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) { // The full vreg is copied. MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg); - SlotIndexes &Indexes = *LIS.getSlotIndexes(); return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); } @@ -567,18 +560,26 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, const TargetRegisterClass *RC = MRI.getRegClass(FromReg); assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class"); - SmallVector Indexes; + SmallVector SubIndexes; // Abort if we cannot possibly implement the COPY with the given indexes. - if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes)) + if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, SubIndexes)) report_fatal_error("Impossible to implement partial COPY"); SlotIndex Def; - for (unsigned BestIdx : Indexes) { + for (unsigned BestIdx : SubIndexes) { Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx, DestLI, Late, Def); } + BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + DestLI.refineSubRanges( + Allocator, LaneMask, + [Def, &Allocator](LiveInterval::SubRange &SR) { + SR.createDeadDef(Def, Allocator); + }, + Indexes, TRI); + return Def; } diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir new file mode 100644 index 0000000000000..6a855c29b7b44 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir @@ -0,0 +1,73 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s + +# Initially %2 starts out with 2 subranges (one for sub0, and one for +# the rest of the lanes). After %2 is split, after refineSubRanges the +# newly created register has a different set of lane masks since the +# copy bundle uses 2 different defs to cover the register. This was +# fixed by doing refineSubRanges after all the COPYs being inserted. + +--- +name: subrange_for_this_mask_not_found +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 7 +body: | + ; CHECK-LABEL: name: subrange_for_this_mask_not_found + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK: SI_SPILL_V1024_SAVE [[DEF1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_NOP 0, implicit [[DEF1]] + ; CHECK: S_NOP 0, implicit [[DEF1]] + ; CHECK: [[DEF2:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[SI_SPILL_V1024_RESTORE:%[0-9]+]]:vreg_1024_align2 = SI_SPILL_V1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; CHECK: undef %5.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:vreg_1024_align2 = COPY [[SI_SPILL_V1024_RESTORE]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK: internal %5.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31:vreg_1024_align2 = COPY [[SI_SPILL_V1024_RESTORE]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; CHECK: } + ; CHECK: %5.sub0:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK: S_NOP 0, implicit %5.sub0 + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: S_NOP 0, implicit %5 + ; CHECK: bb.4: + ; CHECK: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK: [[DEF2:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK: bb.5: + ; CHECK: undef %3.sub0:vreg_1024_align2 = COPY [[DEF]] + ; CHECK: S_NOP 0, implicit %3 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vreg_1024_align2 = IMPLICIT_DEF + %2:vreg_1024_align2 = COPY %1 + + bb.1: + S_NOP 0, implicit %1 + S_NOP 0, implicit %1 + %1:vreg_1024_align2 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + + bb.3: + %2.sub0:vreg_1024_align2 = IMPLICIT_DEF + S_NOP 0, implicit %2.sub0 + + bb.4: + S_NOP 0, implicit %2 + + bb.5: + %2:vreg_1024_align2 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + + bb.6: + undef %4.sub0:vreg_1024_align2 = COPY %0 + S_NOP 0, implicit %4 +...