Skip to content

Commit

Permalink
[AMDGPU][GlobalISel] Check exact width in get*ClassForBitWidth and wi…
Browse files Browse the repository at this point in the history
…den if necessary

Instead of checking if the given bitwidth is less or equal to a bitwidth of an existing RegClass,
check if it has the exact same value.

For LLVM vector types that don't have a corresponding Register Class, widen them during legalization.
That goes for G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT and G_BUILD_VECTOR.

Differential revision: https://reviews.llvm.org/D148096
Reviewers: foad, arsenm
  • Loading branch information
Mateja Marjanovic committed May 3, 2023
1 parent 6175ec0 commit cf76074
Show file tree
Hide file tree
Showing 5 changed files with 541 additions and 353 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4835,6 +4835,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
case TargetOpcode::G_EXTRACT:
if (TypeIdx != 1)
return UnableToLegalize;
Expand All @@ -4843,6 +4844,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INSERT:
case TargetOpcode::G_INSERT_VECTOR_ELT:
case TargetOpcode::G_FREEZE:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
Expand Down
44 changes: 42 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,28 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
};
}

// Increase the number of vector elements to reach the next legal RegClass.
static LegalizeMutation moreElementsToNextExistingRegClass(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
const unsigned NumElts = Ty.getNumElements();
const unsigned EltSize = Ty.getElementType().getSizeInBits();
const unsigned MaxNumElts = MaxRegisterSize / EltSize;

assert(EltSize == 32 || EltSize == 64);
assert(Ty.getSizeInBits() < MaxRegisterSize);

unsigned NewNumElts;
// Find the nearest legal RegClass that is larger than the current type.
for (NewNumElts = NumElts; NewNumElts < MaxNumElts; ++NewNumElts) {
if (SIRegisterInfo::getSGPRClassForBitWidth(NewNumElts * EltSize))
break;
}

return std::pair(TypeIdx, LLT::fixed_vector(NewNumElts, EltSize));
};
}

static LLT getBitcastRegisterType(const LLT Ty) {
const unsigned Size = Ty.getSizeInBits();

Expand Down Expand Up @@ -215,6 +237,15 @@ static LegalityPredicate isRegisterType(unsigned TypeIdx) {
};
}

// RegisterType that doesn't have a corresponding RegClass.
static LegalityPredicate isIllegalRegisterType(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
LLT Ty = Query.Types[TypeIdx];
return isRegisterType(Ty) &&
!SIRegisterInfo::getSGPRClassForBitWidth(Ty.getSizeInBits());
};
}

static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
Expand Down Expand Up @@ -1455,10 +1486,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT VecTy = Query.Types[VecTypeIdx];
const LLT IdxTy = Query.Types[IdxTypeIdx];
const unsigned EltSize = EltTy.getSizeInBits();
const bool isLegalVecType =
!!SIRegisterInfo::getSGPRClassForBitWidth(VecTy.getSizeInBits());
return (EltSize == 32 || EltSize == 64) &&
VecTy.getSizeInBits() % 32 == 0 &&
VecTy.getSizeInBits() <= MaxRegisterSize &&
IdxTy.getSizeInBits() == 32;
IdxTy.getSizeInBits() == 32 &&
isLegalVecType;
})
.bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)),
bitcastToVectorElement32(VecTypeIdx))
Expand All @@ -1484,6 +1518,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(IdxTypeIdx, S32, S32)
.clampMaxNumElements(VecTypeIdx, S32, 32)
// TODO: Clamp elements for 64-bit vectors?
.moreElementsIf(
isIllegalRegisterType(VecTypeIdx),
moreElementsToNextExistingRegClass(VecTypeIdx))
// It should only be necessary with variable indexes.
// As a last resort, lower to the stack
.lower();
Expand Down Expand Up @@ -1538,7 +1575,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalForCartesianProduct(AllS64Vectors, {S64})
.clampNumElements(0, V16S32, V32S32)
.clampNumElements(0, V2S64, V16S64)
.fewerElementsIf(isWideVec16(0), changeTo(0, V2S16));
.fewerElementsIf(isWideVec16(0), changeTo(0, V2S16))
.moreElementsIf(
isIllegalRegisterType(0),
moreElementsToNextExistingRegClass(0));

if (ST.hasScalarPackInsts()) {
BuildVector
Expand Down
Loading

0 comments on commit cf76074

Please sign in to comment.