diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 029f05d0b5072..a6fc9b1ee011c 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1833,6 +1833,18 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { return false; } +/// Return true if the existing assignment of \p Intf overlaps, but is not the +/// same, as \p PhysReg. +static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, + const VirtRegMap &VRM, + MCRegister PhysReg, + const LiveInterval &Intf) { + MCRegister AssignedReg = VRM.getPhys(Intf.reg()); + if (PhysReg == AssignedReg) + return false; + return TRI.regsOverlap(PhysReg, AssignedReg); +} + /// mayRecolorAllInterferences - Check if the virtual registers that /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be /// recolored to free \p PhysReg. @@ -1858,12 +1870,20 @@ bool RAGreedy::mayRecolorAllInterferences( return false; } for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { - // If Intf is done and sit on the same register class as VirtReg, - // it would not be recolorable as it is in the same state as VirtReg. - // However, if VirtReg has tied defs and Intf doesn't, then + // If Intf is done and sits on the same register class as VirtReg, it + // would not be recolorable as it is in the same state as + // VirtReg. However there are at least two exceptions. + // + // If VirtReg has tied defs and Intf doesn't, then // there is still a point in examining if it can be recolorable. + // + // Additionally, if the register class has overlapping tuple members, it + // may still be recolorable using a different tuple. This is more likely + // if the existing assignment aliases with the candidate. + // if (((ExtraInfo->getStage(*Intf) == RS_Done && - MRI->getRegClass(Intf->reg()) == CurRC) && + MRI->getRegClass(Intf->reg()) == CurRC && + !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) && !(hasTiedDef(MRI, VirtReg.reg()) && !hasTiedDef(MRI, Intf->reg()))) || FixedRegisters.count(Intf->reg())) { diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir new file mode 100644 index 0000000000000..09be927dc952e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir @@ -0,0 +1,84 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=greedy -o - %s | FileCheck %s + +# This testcase is restricted to use a maximum of 24 VGPRs. It is +# therefore possible to allocate a maximum of 3 vreg_256s at a +# time. The apparent number of registers in the class is larger, but +# each one overlaps with the next. Allocating a vreg_64 will prevent a +# full vreg_256 from being live at a given point. + +# The hints are trying to force allocation of overlapping vreg_256s +# which cannot be satisfied. The last S_NOP in %bb.0 with 2 vreg_256s +# and a vreg_64 use can be satisfied as long as the hints are ignored. + +# With the resulting allocation order, this ends up using last chance +# recoloring for a vreg_256. We should try to recolor for completed +# virtual registers with the same class, since the existing assignment +# can only be corrected by adjusting to a non-overlapping register. + +--- | + define void @recolor_impossible_hint() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="10,10" } +--- + +--- +name: recolor_impossible_hint +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_256, preferred-register: '$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7' } + - { id: 1, class: vreg_256, preferred-register: '$vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8' } + - { id: 2, class: vreg_256, preferred-register: '$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9' } + - { id: 3, class: vreg_256, preferred-register: '$vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10' } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 10 +body: | + ; CHECK-LABEL: name: recolor_impossible_hint + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit-def %7, implicit-def %19, implicit-def %5 + ; CHECK-NEXT: SI_SPILL_V256_SAVE %19, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: SI_SPILL_V256_SAVE %7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: SI_SPILL_V256_SAVE %5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: S_NOP 0, implicit-def %17 + ; CHECK-NEXT: SI_SPILL_V256_SAVE %17, %stack.2, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: S_NOP 0, implicit-def %4 + ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]] + ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit [[COPY]] + ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]] + ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE3]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + S_NOP 0, implicit-def %0:vreg_256, implicit-def %1:vreg_256, implicit-def %2:vreg_256 + S_NOP 0, implicit-def %3:vreg_256 + S_NOP 0, implicit-def %4:vreg_64 + S_NOP 0, implicit %0, implicit %1, implicit %4 + S_CBRANCH_EXECNZ %bb.3, implicit $exec + + bb.2: + S_NOP 0, implicit %1 + S_NOP 0, implicit %2 + S_NOP 0, implicit %3 + + bb.3: + S_ENDPGM 0 + +...