diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 6b87681528eeb..5f770d6b0bc03 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1768,24 +1768,27 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. - if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { - if (!DstInt->hasSubRanges()) { - BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); - LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); - LaneBitmask UnusedLanes = FullMask & ~UsedLanes; - DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt); - // The unused lanes are just empty live-ranges at this point. - // It is the caller responsibility to set the proper - // dead segments if there is an actual dead def of the - // unused lanes. This may happen with rematerialization. - DstInt->createSubRange(Allocator, UnusedLanes); + if (MO.isUse() && !DstIsPhys) { + unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg()); + if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) { + if (!DstInt->hasSubRanges()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask UnusedLanes = FullMask & ~UsedLanes; + DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt); + // The unused lanes are just empty live-ranges at this point. + // It is the caller responsibility to set the proper + // dead segments if there is an actual dead def of the + // unused lanes. This may happen with rematerialization. + DstInt->createSubRange(Allocator, UnusedLanes); + } + SlotIndex MIIdx = UseMI->isDebugValue() + ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) + : LIS->getInstructionIndex(*UseMI); + SlotIndex UseIdx = MIIdx.getRegSlot(true); + addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx); } - SlotIndex MIIdx = UseMI->isDebugValue() - ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) - : LIS->getInstructionIndex(*UseMI); - SlotIndex UseIdx = MIIdx.getRegSlot(true); - addUndefFlag(*DstInt, UseIdx, MO, SubIdx); } if (DstIsPhys) diff --git a/llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir b/llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir new file mode 100644 index 0000000000000..c60d8297f57da --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir @@ -0,0 +1,81 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -verify-coalescing -run-pass=simple-register-coalescing -o - %s | FileCheck %s + +# The copy from %0 to %1 introduces liveness for %3.sub2. After +# coalescing, the use of %1.sub2 needs to be marked undef. The +# subregless copy previously did not consider the existing subregister +# on the use operand. + +--- +name: undef_subreg_use_after_full_copy_coalesce_0 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: undef_subreg_use_after_full_copy_coalesce_0 + ; CHECK: undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: dead %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_ENDPGM 0, implicit undef %0.sub2 + undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec + %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec + %1:vreg_96 = COPY killed %0 + S_ENDPGM 0, implicit %1.sub2 + +... + +# Same, except coalesced copy has a subregister index that needs to be +# composed with the use index. +--- +name: undef_subreg_use_after_full_copy_coalesce_composed +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: undef_subreg_use_after_full_copy_coalesce_composed + ; CHECK: undef %0.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: dead %0.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: S_ENDPGM 0, implicit undef %2.sub1:vreg_64 + undef %0.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec + %0.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec + %1:vreg_128 = COPY killed %0 + %2:vreg_64 = COPY killed %1.sub2_sub3 + S_ENDPGM 0, implicit %2.sub1 + +... + +# FIXME: Initial computed range is wrong for %0.sub2_sub3 and fails +# verifier. +# --- +# name: undef_subreg_use_after_full_copy_coalesce_composed2 +# tracksRegLiveness: true +# body: | +# bb.0: +# undef %0.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec +# %0.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec +# %1:vreg_128 = COPY killed %0.sub2_sub3 +# S_ENDPGM 0, implicit %1.sub1 + +# ... + +--- +name: undef_subreg_use_after_full_copy_coalesce_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: undef_subreg_use_after_full_copy_coalesce_1 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: undef %2.sub0:vreg_96 = COPY $vgpr0 + ; CHECK: %2.sub1:vreg_96 = COPY $vgpr1 + ; CHECK: S_NOP 0, implicit undef %2.sub2 + ; CHECK: S_NOP 0, implicit %2.sub1 + ; CHECK: S_ENDPGM 0 + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = COPY killed $vgpr1 + undef %2.sub0:vreg_96 = COPY killed %0 + %2.sub1:vreg_96 = COPY killed %1 + %3:vreg_96 = COPY killed %2 + S_NOP 0, implicit %3.sub2 + S_NOP 0, implicit %3.sub1 + S_ENDPGM 0 + +...