[AMDGPU] fix copies between 32 and 16 bit

This a hack to fix illegal 32 to 16 bit copies. The problem is when we make 16 bit subregs legal it creates a huge amount of failures which can only be resolved at once without a temporary hack like this. The next step is to change operands, instruction definitions and patterns until this hack is not needed. Differential Revision: https://reviews.llvm.org/D79119
llvm · May 4, 2020 · c85eda7 · c85eda7
1 parent 8303b1f
commit c85eda7
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 0 deletions.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -83,6 +83,12 @@ static cl::opt<unsigned>
 BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
                  cl::desc("Restrict range of branch instructions (DEBUG)"));
 
+static cl::opt<bool> Fix16BitCopies(
+  "amdgpu-fix-16-bit-physreg-copies",
+  cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"),
+  cl::init(true),
+  cl::ReallyHidden);
+
 SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
   : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
     RI(ST), ST(ST) {
@@ -527,6 +533,25 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                               MCRegister SrcReg, bool KillSrc) const {
   const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
 
+  // FIXME: This is hack to resolve copies between 16 bit and 32 bit
+  // registers until all patterns are fixed.
+  if (Fix16BitCopies &&
+      ((RI.getRegSizeInBits(*RC) == 16) ^
+       (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) {
+    MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
+    MCRegister Super = RI.get32BitRegister(RegToFix);
+    assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix);
+    RegToFix = Super;
+
+    if (DestReg == SrcReg) {
+      // Insert empty bundle since ExpandPostRA expects an instruction here.
+      BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE));
+      return;
+    }
+
+    RC = RI.getPhysRegClass(DestReg);
+  }
+
   if (RC == &AMDGPU::VGPR_32RegClass) {
     assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
            AMDGPU::SReg_32RegClass.contains(SrcReg) ||

diff --git a/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir
@@ -0,0 +1,36 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass postrapseudos -amdgpu-fix-16-bit-physreg-copies -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: lo16_to_v32
+# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+name: lo16_to_v32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1_lo16 = COPY $vgpr0
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: v32_to_lo16
+# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+name: v32_to_lo16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = COPY $vgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: samereg
+# GCN:      $vgpr0 = IMPLICIT_DEF
+# GCN-NEXT: BUNDLE
+# GCN-NEXT: S_ENDPGM
+name: samereg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr0 = COPY $vgpr0_lo16
+    S_ENDPGM 0
+...