Skip to content

Commit

Permalink
[AMDGPU] Add missing AReg classes
Browse files Browse the repository at this point in the history
Add 96-bit, 160-bit and 256-bit AReg classes to match VReg and SReg.
NFC as far as I know, but it may avoid weird legalization problems.

Differential Revision: https://reviews.llvm.org/D78348
  • Loading branch information
jayfoad committed Apr 22, 2020
1 parent b2f06bd commit d625b4b
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 2 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Expand Up @@ -798,6 +798,10 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
IsSGPR = true;
Width = 3;
} else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 3;
} else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
"trap handler registers should not be used");
Expand All @@ -816,6 +820,10 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
IsSGPR = true;
Width = 5;
} else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 5;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
"trap handler registers should not be used");
Expand All @@ -824,6 +832,10 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
IsSGPR = false;
Width = 8;
} else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 8;
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
"trap handler registers should not be used");
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
Expand Up @@ -18,5 +18,5 @@ def VGPRRegBank : RegisterBank<"VGPR",
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;

def AGPRRegBank : RegisterBank <"AGPR",
[AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024]
[AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_256, AReg_512, AReg_1024]
>;
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Expand Up @@ -1942,7 +1942,10 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
default: return -1;
case 1: return AMDGPU::AGPR_32RegClassID;
case 2: return AMDGPU::AReg_64RegClassID;
case 3: return AMDGPU::AReg_96RegClassID;
case 4: return AMDGPU::AReg_128RegClassID;
case 5: return AMDGPU::AReg_160RegClassID;
case 8: return AMDGPU::AReg_256RegClassID;
case 16: return AMDGPU::AReg_512RegClassID;
case 32: return AMDGPU::AReg_1024RegClassID;
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Expand Up @@ -930,6 +930,7 @@ unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
return AGPR_32RegClassID;
case OPW64: return AReg_64RegClassID;
case OPW128: return AReg_128RegClassID;
case OPW256: return AReg_256RegClassID;
case OPW512: return AReg_512RegClassID;
case OPW1024: return AReg_1024RegClassID;
}
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
Expand Up @@ -418,7 +418,11 @@ SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
// instructions use acc[0:1] modifier bits to distinguish. These bits are
// encoded as a virtual 9th bit of the register for these operands.
if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg))
MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg))
Enc |= 512;

return Enc;
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -10657,9 +10657,18 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 64:
RC = &AMDGPU::AReg_64RegClass;
break;
case 96:
RC = &AMDGPU::AReg_96RegClass;
break;
case 128:
RC = &AMDGPU::AReg_128RegClass;
break;
case 160:
RC = &AMDGPU::AReg_160RegClass;
break;
case 256:
RC = &AMDGPU::AReg_256RegClass;
break;
case 512:
RC = &AMDGPU::AReg_512RegClass;
break;
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Expand Up @@ -1309,8 +1309,14 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) {
return &AMDGPU::AGPR_32RegClass;
case 64:
return &AMDGPU::AReg_64RegClass;
case 96:
return &AMDGPU::AReg_96RegClass;
case 128:
return &AMDGPU::AReg_128RegClass;
case 160:
return &AMDGPU::AReg_160RegClass;
case 256:
return &AMDGPU::AReg_256RegClass;
case 512:
return &AMDGPU::AReg_512RegClass;
case 1024:
Expand Down Expand Up @@ -1362,13 +1368,16 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
&AMDGPU::AReg_64RegClass,
&AMDGPU::VReg_96RegClass,
&AMDGPU::SReg_96RegClass,
&AMDGPU::AReg_96RegClass,
&AMDGPU::VReg_128RegClass,
&AMDGPU::SReg_128RegClass,
&AMDGPU::AReg_128RegClass,
&AMDGPU::VReg_160RegClass,
&AMDGPU::SReg_160RegClass,
&AMDGPU::AReg_160RegClass,
&AMDGPU::VReg_256RegClass,
&AMDGPU::SReg_256RegClass,
&AMDGPU::AReg_256RegClass,
&AMDGPU::VReg_512RegClass,
&AMDGPU::SReg_512RegClass,
&AMDGPU::AReg_512RegClass,
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Expand Up @@ -519,14 +519,24 @@ def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "AGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
let Weight = 1;
}

// AGPR 64-bit registers
def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;

// AGPR 96-bit registers
def AGPR_96 : SIRegisterTuples<getSubRegs<3>.ret, AGPR_32, 255, 1, 3, "a">;

// AGPR 128-bit registers
def AGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, AGPR_32, 255, 1, 4, "a">;

// AGPR 160-bit registers
def AGPR_160 : SIRegisterTuples<getSubRegs<5>.ret, AGPR_32, 255, 1, 5, "a">;

// AGPR 256-bit registers
def AGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, AGPR_32, 255, 1, 8, "a">;

// AGPR 512-bit registers
def AGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, AGPR_32, 255, 1, 16, "a">;

Expand Down Expand Up @@ -752,7 +762,10 @@ class ARegClass<int numRegs, list<ValueType> regTypes, dag regList> :

def AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
(add AGPR_64)>;
def AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
def AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64], (add AGPR_128)>;
def AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
def AReg_256 : ARegClass<8, [v8i32, v8f32], (add AGPR_256)>;
def AReg_512 : ARegClass<16, [v16i32, v16f32], (add AGPR_512)>;
def AReg_1024 : ARegClass<32, [v32i32, v32f32], (add AGPR_1024)>;

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Expand Up @@ -1104,6 +1104,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::SGPR_96RegClassID:
case AMDGPU::SReg_96RegClassID:
case AMDGPU::VReg_96RegClassID:
case AMDGPU::AReg_96RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
Expand All @@ -1113,9 +1114,11 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::SGPR_160RegClassID:
case AMDGPU::SReg_160RegClassID:
case AMDGPU::VReg_160RegClassID:
case AMDGPU::AReg_160RegClassID:
return 160;
case AMDGPU::SReg_256RegClassID:
case AMDGPU::VReg_256RegClassID:
case AMDGPU::AReg_256RegClassID:
return 256;
case AMDGPU::SReg_512RegClassID:
case AMDGPU::VReg_512RegClassID:
Expand Down

0 comments on commit d625b4b

Please sign in to comment.