Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Fix handling of G_ANYEXT with s1 source
Browse files Browse the repository at this point in the history
We were letting G_ANYEXT with a vcc register bank through, which was
incorrect and would select to an invalid copy. Fix this up like G_ZEXT
and G_SEXT. Also drop old code to fixup the non-boolean case in
RegBankSelect. We now have to perform that expansion during selection,
so there's no benefit to doing it during RegBankSelect.
  • Loading branch information
arsenm committed Mar 16, 2020
1 parent c460dc6 commit 80b627d
Show file tree
Hide file tree
Showing 5 changed files with 274 additions and 432 deletions.
55 changes: 19 additions & 36 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Expand Up @@ -220,7 +220,7 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
unsigned Size) const {
// TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
isVectorRegisterBank(Src)) {
(isVectorRegisterBank(Src) || Src.getID() == AMDGPU::VCCRegBankID)) {
return std::numeric_limits<unsigned>::max();
}

Expand All @@ -238,9 +238,6 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
Src.getID() == AMDGPU::VCCRegBankID))
return std::numeric_limits<unsigned>::max();

if (Src.getID() == AMDGPU::VCCRegBankID)
return std::numeric_limits<unsigned>::max();

// There is no direct copy between AGPRs.
if (Dst.getID() == AMDGPU::AGPRRegBankID &&
Src.getID() == AMDGPU::AGPRRegBankID)
Expand Down Expand Up @@ -2252,10 +2249,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_SEXT:
case AMDGPU::G_ZEXT: {
case AMDGPU::G_ZEXT:
case AMDGPU::G_ANYEXT: {
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
bool Signed = Opc == AMDGPU::G_SEXT;
const bool Signed = Opc == AMDGPU::G_SEXT;

assert(empty(OpdMapper.getVRegs(1)));

MachineIRBuilder B(MI);
const RegisterBank *SrcBank =
Expand All @@ -2282,9 +2282,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
auto ShiftAmt = B.buildConstant(S32, 31);
MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
} else {
} else if (Opc == AMDGPU::G_ZEXT) {
B.buildZExtOrTrunc(DefRegs[0], SrcReg);
B.buildConstant(DefRegs[1], 0);
} else {
B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
B.buildUndef(DefRegs[1]);
}

MRI.setRegBank(DstReg, *SrcBank);
Expand All @@ -2295,6 +2298,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (SrcTy != LLT::scalar(1))
return;

// It is not legal to have a legalization artifact with a VCC source. Rather
// than introducing a copy, insert the selcet we would have to select the
// copy to.
if (SrcBank == &AMDGPU::VCCRegBank) {
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));

Expand Down Expand Up @@ -2329,24 +2335,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}

// Fixup the case with an s1 src that isn't a condition register. Use shifts
// instead of introducing a compare to avoid an unnecessary condition
// register (and since there's no scalar 16-bit compares).
auto Ext = B.buildAnyExt(DstTy, SrcReg);
auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);

if (MI.getOpcode() == AMDGPU::G_SEXT)
B.buildAShr(DstReg, Shl, ShiftAmt);
else
B.buildLShr(DstReg, Shl, ShiftAmt);

MRI.setRegBank(DstReg, *SrcBank);
MRI.setRegBank(Ext.getReg(0), *SrcBank);
MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
MRI.setRegBank(Shl.getReg(0), *SrcBank);
MI.eraseFromParent();
return;
break;
}
case AMDGPU::G_BUILD_VECTOR:
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
Expand Down Expand Up @@ -3423,17 +3412,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}

// TODO: Should anyext be split into 32-bit part as well?
if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
} else {
// Scalar extend can use 64-bit BFE, but VGPRs require extending to
// 32-bits, and then to 64.
OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
SrcSize);
}
// Scalar extend can use 64-bit BFE, but VGPRs require extending to
// 32-bits, and then to 64.
OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
SrcSize);
break;
}
case AMDGPU::G_FCMP: {
Expand Down

0 comments on commit 80b627d

Please sign in to comment.