Skip to content

Commit

Permalink
[AMDGPU] Fix register class for a subreg in GCNRewritePartialRegUses.
Browse files Browse the repository at this point in the history
1. Improved code that deduces register class from instruction definitions. Previously if some instruction didn't contain a reg class for an operand it was considered as no information on register class even if other instructions specified the class.

2. Added check on required size of resulting register because in some cases classes with smaller registers had been selected (for example VReg_1).

Reviewed By: arsenm, #amdgpu

Differential Revision: https://reviews.llvm.org/D152832
  • Loading branch information
vpykhtin committed Jul 6, 2023
1 parent 893cc97 commit 98aa843
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 56 deletions.
64 changes: 46 additions & 18 deletions llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,25 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC,
SubRegMap &SubRegs) const;

/// Try to find register class containing registers of minimal size for a
/// given register class RC and used subregs as keys in SubRegs by shifting
/// offsets of the subregs by RShift value to the right. If found return the
/// resulting regclass and new shifted subregs as values in SubRegs map.
/// If CoverSubregIdx isn't null it specifies covering subreg.
/// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to
/// find new regclass such that:
/// 1. It has subregs obtained by shifting each OldSubReg by RShift number
/// of bits to the right. Every "shifted" subreg should have the same
/// SubRegRC. SubRegRC can be null, in this case it initialized using
/// getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that
/// "covers" all other subregs in pairs. Basically such subreg becomes a
/// whole register.
/// 2. Resulting register class contains registers of minimal size but not
/// less than RegNumBits.
///
/// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
/// parameter:
/// OldSubReg - input parameter,
/// SubRegRC - in/out, should be changed for unknown regclass,
/// NewSubReg - output, contains shifted subregs on return.
const TargetRegisterClass *
getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
unsigned CoverSubregIdx,
unsigned RegNumBits, unsigned CoverSubregIdx,
SubRegMap &SubRegs) const;

/// Update live intervals after rewriting OldReg to NewReg with SubRegs map
Expand Down Expand Up @@ -207,8 +218,8 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(

const TargetRegisterClass *
GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
const TargetRegisterClass *RC, unsigned RShift, unsigned CoverSubregIdx,
SubRegMap &SubRegs) const {
const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
unsigned CoverSubregIdx, SubRegMap &SubRegs) const {

unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign
Expand All @@ -218,7 +229,13 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
for (auto &[OldSubReg, SRI] : SubRegs) {
auto &[SubRegRC, NewSubReg] = SRI;

// Instruction operand may not specify required register class (ex. COPY).
// Register class may be unknown, for example:
// undef %0.sub4:sgpr_1024 = S_MOV_B32 01
// %0.sub5:sgpr_1024 = S_MOV_B32 02
// %1:vreg_64 = COPY %0.sub4_sub5
// Register classes for subregs 'sub4' and 'sub5' are known from the
// description of destination operand of S_MOV_B32 instruction but the
// class for the subreg 'sub4_sub5' isn't specified by the COPY instruction.
if (!SubRegRC)
SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);

Expand Down Expand Up @@ -256,21 +273,26 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
// ClassMask is the set of all register classes such that each class is
// allocatable, aligned, has all shifted subregs and each subreg has required
// register class (see SubRegRC above). Now select first (that is largest)
// register class with registers of minimal size.
// register class with registers of minimal but not less than RegNumBits size.
// We have to check register size because we may encounter classes of smaller
// registers like VReg_1 in some situations.
const TargetRegisterClass *MinRC = nullptr;
unsigned MinNumBits = std::numeric_limits<unsigned>::max();
for (unsigned ClassID : ClassMask.set_bits()) {
auto *RC = TRI->getRegClass(ClassID);
unsigned NumBits = TRI->getRegSizeInBits(*RC);
if (NumBits < MinNumBits) {
if (NumBits < MinNumBits && NumBits >= RegNumBits) {
MinNumBits = NumBits;
MinRC = RC;
}
if (MinNumBits == RegNumBits)
break;
}
#ifndef NDEBUG
if (MinRC) {
assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign));
for (auto [SubReg, SRI] : SubRegs)
// Check that all registers in MinRC support SRI.SubReg subregister.
assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
}
#endif
Expand Down Expand Up @@ -302,7 +324,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
// If covering subreg is found shift everything so the covering subreg would
// be in the rightmost position.
if (CoverSubreg != AMDGPU::NoSubRegister)
return getRegClassWithShiftedSubregs(RC, Offset, CoverSubreg, SubRegs);
return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg,
SubRegs);

// Otherwise find subreg with maximum required alignment and shift it and all
// other subregs to the rightmost possible position with respect to the
Expand All @@ -328,7 +351,7 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
llvm_unreachable("misaligned subreg");

unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
return getRegClassWithShiftedSubregs(RC, RShift, 0, SubRegs);
return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs);
}

// Only the subrange's lanemasks of the original interval need to be modified.
Expand Down Expand Up @@ -406,21 +429,26 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
return false;
}

auto *RC = MRI->getRegClass(Reg);
LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
<< ':' << TRI->getRegClassName(RC) << '\n');

// Collect used subregs and constrained reg classes infered from instruction
// operands.
SubRegMap SubRegs;
for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
assert(MO.getSubReg() != AMDGPU::NoSubRegister);
auto *OpDescRC = getOperandRegClass(MO);
const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC);
if (!Inserted) {
if (!Inserted && OpDescRC) {
SubRegInfo &SRI = I->second;
SRI.RC = TRI->getCommonSubClass(SRI.RC, OpDescRC);
SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
if (!SRI.RC) {
LLVM_DEBUG(dbgs() << " Couldn't find common target regclass\n");
return false;
}
}
}
auto *RC = MRI->getRegClass(Reg);
LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
<< ':' << TRI->getRegClassName(RC) << '\n');

auto *NewRC = getMinSizeReg(RC, SubRegs);
if (!NewRC) {
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
Original file line number Diff line number Diff line change
Expand Up @@ -4341,9 +4341,9 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_64_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
undef %0.sub0:sgpr_64 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand All @@ -4358,11 +4358,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_96_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 22
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_96 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand All @@ -4381,11 +4381,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_128_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_128 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand Down Expand Up @@ -4425,11 +4425,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_160_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 24
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_160 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand All @@ -4450,11 +4450,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_192_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 25
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_192 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand Down Expand Up @@ -4503,11 +4503,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_224_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 26
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_224 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand All @@ -4530,11 +4530,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_256_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 27
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_256 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand Down Expand Up @@ -4612,11 +4612,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_288_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_288 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand Down Expand Up @@ -4672,11 +4672,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_320_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 29
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_320 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand Down Expand Up @@ -4763,11 +4763,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_352_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 210
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 210
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_352 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand All @@ -4791,11 +4791,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_384_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 211
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 211
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_384 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand Down Expand Up @@ -4929,11 +4929,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_512_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 215
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 215
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_512 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand Down Expand Up @@ -5086,11 +5086,11 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_sgpr_1024_w32
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 231
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 231
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
undef %0.sub0:sgpr_1024 = S_MOV_B32 00
S_NOP 0, implicit %0.sub0
Expand Down
36 changes: 36 additions & 0 deletions llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,39 @@ body: |
S_NOP 0, implicit %4.sub1_sub2_sub3_sub4_sub5_sub6
S_NOP 0, implicit %4.sub3_sub4_sub5_sub6_sub7_sub8
...
---
name: test_subregs_unknown_regclass_from_instructions
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_subregs_unknown_regclass_from_instructions
; CHECK: undef %2.sub0:sgpr_64 = S_MOV_B32 1
; CHECK-NEXT: %2.sub1:sgpr_64 = S_MOV_B32 2
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY %2
undef %0.sub4:sgpr_1024 = S_MOV_B32 01
%0.sub5:sgpr_1024 = S_MOV_B32 02
%1:vreg_64 = COPY %0.sub4_sub5
...
---
name: test_subregs_unknown_regclass_from_instructions_sgpr_1024_to_sgpr_64
tracksRegLiveness: true
registers:
- { id: 0, class: sgpr_1024 }
body: |
bb.0:
; CHECK-LABEL: name: test_subregs_unknown_regclass_from_instructions_sgpr_1024_to_sgpr_64
; CHECK: dead [[COPY:%[0-9]+]]:vreg_64 = COPY undef %2:sgpr_64
%1:vreg_64 = COPY undef %0.sub4_sub5
...
---
name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
undef %0.sub2_sub3:sgpr_128 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
%2:vreg_64 = COPY %0.sub2_sub3:sgpr_128
...

0 comments on commit 98aa843

Please sign in to comment.