Skip to content

Commit

Permalink
[AMDGPU] fix copies between 32 and 16 bit
Browse files Browse the repository at this point in the history
This a hack to fix illegal 32 to 16 bit copies.
The problem is when we make 16 bit subregs legal it creates
a huge amount of failures which can only be resolved at once
without a temporary hack like this.

The next step is to change operands, instruction definitions
and patterns until this hack is not needed.

Differential Revision: https://reviews.llvm.org/D79119
  • Loading branch information
rampitec committed May 4, 2020
1 parent 8303b1f commit c85eda7
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 0 deletions.
25 changes: 25 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -83,6 +83,12 @@ static cl::opt<unsigned>
BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
cl::desc("Restrict range of branch instructions (DEBUG)"));

static cl::opt<bool> Fix16BitCopies(
"amdgpu-fix-16-bit-physreg-copies",
cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"),
cl::init(true),
cl::ReallyHidden);

SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
RI(ST), ST(ST) {
Expand Down Expand Up @@ -527,6 +533,25 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MCRegister SrcReg, bool KillSrc) const {
const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);

// FIXME: This is hack to resolve copies between 16 bit and 32 bit
// registers until all patterns are fixed.
if (Fix16BitCopies &&
((RI.getRegSizeInBits(*RC) == 16) ^
(RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) {
MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
MCRegister Super = RI.get32BitRegister(RegToFix);
assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix);
RegToFix = Super;

if (DestReg == SrcReg) {
// Insert empty bundle since ExpandPostRA expects an instruction here.
BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE));
return;
}

RC = RI.getPhysRegClass(DestReg);
}

if (RC == &AMDGPU::VGPR_32RegClass) {
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
AMDGPU::SReg_32RegClass.contains(SrcReg) ||
Expand Down
36 changes: 36 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir
@@ -0,0 +1,36 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass postrapseudos -amdgpu-fix-16-bit-physreg-copies -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

# GCN-LABEL: name: lo16_to_v32
# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
name: lo16_to_v32
tracksRegLiveness: true
body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
$vgpr1_lo16 = COPY $vgpr0
S_ENDPGM 0
...

# GCN-LABEL: name: v32_to_lo16
# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
name: v32_to_lo16
tracksRegLiveness: true
body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
$vgpr1 = COPY $vgpr0_lo16
S_ENDPGM 0
...

# GCN-LABEL: name: samereg
# GCN: $vgpr0 = IMPLICIT_DEF
# GCN-NEXT: BUNDLE
# GCN-NEXT: S_ENDPGM
name: samereg
tracksRegLiveness: true
body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
$vgpr0 = COPY $vgpr0_lo16
S_ENDPGM 0
...

0 comments on commit c85eda7

Please sign in to comment.